Spaces:
No application file
No application file
Updated_11
Browse files- LSTM_model.keras +0 -0
- historical_news.ipynb β Stocks news prediction/Notebooks/1_historical_news.ipynb +25 -47
- Stocks news prediction/Notebooks/2_historical_stock.ipynb +127 -0
- news_preprocessing.ipynb β Stocks news prediction/Notebooks/3_news_preprocessing.ipynb +5 -1
- stock_preprocessing.ipynb β Stocks news prediction/Notebooks/4_stock_preprocessing.ipynb +31 -348
- Stocks news prediction/Notebooks/5_feature_pipeline.ipynb +493 -0
- feature_view.ipynb β Stocks news prediction/Notebooks/6_feature_view.ipynb +38 -126
- Stocks news prediction/Notebooks/7_training_pipeline.ipynb +839 -0
- Stocks news prediction/Notebooks/8_inference_pipeline.ipynb +315 -0
- Stocks news prediction/SML/__pycache__/feature_pipeline.cpython-311.pyc +0 -0
- Stocks news prediction/SML/__pycache__/news_preprocessing.cpython-311.pyc +0 -0
- feature_pipeline.py β Stocks news prediction/SML/feature_pipeline.py +20 -61
- feature_view.py β Stocks news prediction/SML/feature_view.py +11 -37
- Stocks news prediction/SML/historical_news.py +120 -0
- Stocks news prediction/SML/historical_stock.py +51 -0
- news_preprocessing.py β Stocks news prediction/SML/news_preprocessing.py +7 -3
- stock_preprocessing.py β Stocks news prediction/SML/stock_preprocessing.py +17 -21
- Stocks news prediction/SML/training_pipeline.py +256 -0
- TSLA_stock_price.csv β Stocks news prediction/TSLA_stock_price.csv +0 -0
- news_articles.csv β Stocks news prediction/news_articles.csv +0 -0
- news_articles_ema.csv β Stocks news prediction/news_articles_ema.csv +0 -0
- feature_engineering.ipynb +0 -73
- feature_pipeline.ipynb +0 -775
- feature_view_freddie.py +0 -95
- historical_stock.ipynb +0 -257
- requirements.txt +1 -0
- training_pipeline.ipynb +0 -167
LSTM_model.keras
DELETED
Binary file (291 kB)
|
|
historical_news.ipynb β Stocks news prediction/Notebooks/1_historical_news.ipynb
RENAMED
@@ -1,20 +1,5 @@
|
|
1 |
{
|
2 |
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "code",
|
5 |
-
"execution_count": 1,
|
6 |
-
"metadata": {},
|
7 |
-
"outputs": [],
|
8 |
-
"source": [
|
9 |
-
"from dotenv import load_dotenv\n",
|
10 |
-
"from datetime import datetime, timedelta\n",
|
11 |
-
"import requests\n",
|
12 |
-
"import os\n",
|
13 |
-
"import time\n",
|
14 |
-
"import pandas as pd \n",
|
15 |
-
"from news_preprocessing import *"
|
16 |
-
]
|
17 |
-
},
|
18 |
{
|
19 |
"cell_type": "code",
|
20 |
"execution_count": 2,
|
@@ -32,6 +17,14 @@
|
|
32 |
}
|
33 |
],
|
34 |
"source": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
"load_dotenv()"
|
36 |
]
|
37 |
},
|
@@ -44,32 +37,23 @@
|
|
44 |
"name": "stdout",
|
45 |
"output_type": "stream",
|
46 |
"text": [
|
47 |
-
"Fetched 50 articles from 2022-05-
|
48 |
-
"Fetched 50 articles from 2022-06-
|
49 |
-
"Fetched 50 articles from 2022-08-
|
50 |
-
"Fetched 50 articles from 2022-10-
|
51 |
-
"Fetched 50 articles from 2022-11-
|
52 |
-
"Rate limit reached. Waiting to retry...\n",
|
53 |
-
"Fetched 50 articles from 2023-01-16 to 2023-03-07\n",
|
54 |
-
"Fetched 50 articles from 2023-03-08 to 2023-04-27\n",
|
55 |
-
"Fetched 50 articles from 2023-04-28 to 2023-06-17\n",
|
56 |
-
"Fetched 50 articles from 2023-06-18 to 2023-08-07\n",
|
57 |
-
"Fetched 50 articles from 2023-08-08 to 2023-09-27\n",
|
58 |
"Rate limit reached. Waiting to retry...\n",
|
59 |
-
"Fetched 50 articles from 2023-
|
60 |
-
"Fetched 50 articles from 2023-
|
61 |
-
"Fetched 50 articles from
|
62 |
-
"Fetched 50 articles from
|
63 |
-
"Fetched 50 articles from
|
64 |
-
"
|
65 |
]
|
66 |
}
|
67 |
],
|
68 |
"source": [
|
69 |
-
"
|
70 |
-
"import requests\n",
|
71 |
-
"from datetime import datetime, timedelta\n",
|
72 |
-
"import pandas as pd\n",
|
73 |
"\n",
|
74 |
"def fetch_news(api_key, ticker, start_date, end_date):\n",
|
75 |
" base_url = os.environ.get(\"endpointnewsp\")\n",
|
@@ -97,10 +81,10 @@
|
|
97 |
" data = response.json()\n",
|
98 |
" articles = data.get('results', [])\n",
|
99 |
" \n",
|
100 |
-
" #
|
101 |
" df = pd.DataFrame(articles)\n",
|
102 |
" \n",
|
103 |
-
" #
|
104 |
" df['ticker'] = df['tickers'].apply(lambda x: ticker if ticker in x else None)\n",
|
105 |
" \n",
|
106 |
" all_news.append(df) # Append DataFrame to the list\n",
|
@@ -119,7 +103,7 @@
|
|
119 |
"\n",
|
120 |
" return pd.concat(all_news, ignore_index=True)\n",
|
121 |
"\n",
|
122 |
-
"#
|
123 |
"api_key = os.environ.get('newsp_api')\n",
|
124 |
"ticker = 'TSLA'\n",
|
125 |
"end_date = datetime.now() - timedelta(days=1) # Yesterday's date\n",
|
@@ -263,7 +247,8 @@
|
|
263 |
"metadata": {},
|
264 |
"outputs": [],
|
265 |
"source": [
|
266 |
-
"
|
|
|
267 |
]
|
268 |
},
|
269 |
{
|
@@ -638,13 +623,6 @@
|
|
638 |
"source": [
|
639 |
"df_processed.head()"
|
640 |
]
|
641 |
-
},
|
642 |
-
{
|
643 |
-
"cell_type": "code",
|
644 |
-
"execution_count": null,
|
645 |
-
"metadata": {},
|
646 |
-
"outputs": [],
|
647 |
-
"source": []
|
648 |
}
|
649 |
],
|
650 |
"metadata": {
|
|
|
1 |
{
|
2 |
"cells": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
"execution_count": 2,
|
|
|
17 |
}
|
18 |
],
|
19 |
"source": [
|
20 |
+
"#Importing necessary libraries\n",
|
21 |
+
"from dotenv import load_dotenv\n",
|
22 |
+
"from datetime import datetime, timedelta\n",
|
23 |
+
"import requests\n",
|
24 |
+
"import os\n",
|
25 |
+
"import time\n",
|
26 |
+
"import pandas as pd \n",
|
27 |
+
"from SML import news_preprocessing #Importing everything from 'news_preprocessing'\n",
|
28 |
"load_dotenv()"
|
29 |
]
|
30 |
},
|
|
|
37 |
"name": "stdout",
|
38 |
"output_type": "stream",
|
39 |
"text": [
|
40 |
+
"Fetched 50 articles from 2022-05-07 to 2022-06-26\n",
|
41 |
+
"Fetched 50 articles from 2022-06-27 to 2022-08-16\n",
|
42 |
+
"Fetched 50 articles from 2022-08-17 to 2022-10-06\n",
|
43 |
+
"Fetched 50 articles from 2022-10-07 to 2022-11-26\n",
|
44 |
+
"Fetched 50 articles from 2022-11-27 to 2023-01-16\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
"Rate limit reached. Waiting to retry...\n",
|
46 |
+
"Fetched 50 articles from 2023-01-17 to 2023-03-08\n",
|
47 |
+
"Fetched 50 articles from 2023-03-09 to 2023-04-28\n",
|
48 |
+
"Fetched 50 articles from 2023-04-29 to 2023-06-18\n",
|
49 |
+
"Fetched 50 articles from 2023-06-19 to 2023-08-08\n",
|
50 |
+
"Fetched 50 articles from 2023-08-09 to 2023-09-28\n",
|
51 |
+
"Rate limit reached. Waiting to retry...\n"
|
52 |
]
|
53 |
}
|
54 |
],
|
55 |
"source": [
|
56 |
+
"#Defining a function for fetching news\n",
|
|
|
|
|
|
|
57 |
"\n",
|
58 |
"def fetch_news(api_key, ticker, start_date, end_date):\n",
|
59 |
" base_url = os.environ.get(\"endpointnewsp\")\n",
|
|
|
81 |
" data = response.json()\n",
|
82 |
" articles = data.get('results', [])\n",
|
83 |
" \n",
|
84 |
+
" # Creating a DataFrame from articles\n",
|
85 |
" df = pd.DataFrame(articles)\n",
|
86 |
" \n",
|
87 |
+
" # Adding primary_key column if ticker is found\n",
|
88 |
" df['ticker'] = df['tickers'].apply(lambda x: ticker if ticker in x else None)\n",
|
89 |
" \n",
|
90 |
" all_news.append(df) # Append DataFrame to the list\n",
|
|
|
103 |
"\n",
|
104 |
" return pd.concat(all_news, ignore_index=True)\n",
|
105 |
"\n",
|
106 |
+
"#Usage\n",
|
107 |
"api_key = os.environ.get('newsp_api')\n",
|
108 |
"ticker = 'TSLA'\n",
|
109 |
"end_date = datetime.now() - timedelta(days=1) # Yesterday's date\n",
|
|
|
247 |
"metadata": {},
|
248 |
"outputs": [],
|
249 |
"source": [
|
250 |
+
"#Putting the news articles into a csv\n",
|
251 |
+
"df.to_csv('news_articles.csv', index=False)"
|
252 |
]
|
253 |
},
|
254 |
{
|
|
|
623 |
"source": [
|
624 |
"df_processed.head()"
|
625 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
626 |
}
|
627 |
],
|
628 |
"metadata": {
|
Stocks news prediction/Notebooks/2_historical_stock.ipynb
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 3,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"data": {
|
10 |
+
"text/plain": [
|
11 |
+
"True"
|
12 |
+
]
|
13 |
+
},
|
14 |
+
"execution_count": 3,
|
15 |
+
"metadata": {},
|
16 |
+
"output_type": "execute_result"
|
17 |
+
}
|
18 |
+
],
|
19 |
+
"source": [
|
20 |
+
"#Importing necessary librabries\n",
|
21 |
+
"from dotenv import load_dotenv\n",
|
22 |
+
"import os \n",
|
23 |
+
"from alpha_vantage.timeseries import TimeSeries\n",
|
24 |
+
"import pandas as pd\n",
|
25 |
+
"import hopsworks\n",
|
26 |
+
"import re \n",
|
27 |
+
"import modal \n",
|
28 |
+
"#prepocessing\n",
|
29 |
+
"import requests\n",
|
30 |
+
"import pandas as pd\n",
|
31 |
+
"import json\n",
|
32 |
+
"#import pandas_market_calendars as mcal\n",
|
33 |
+
"import datetime\n",
|
34 |
+
"import numpy as np\n",
|
35 |
+
"from datetime import timedelta\n",
|
36 |
+
"load_dotenv() #Making the .env file work"
|
37 |
+
]
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"cell_type": "code",
|
41 |
+
"execution_count": null,
|
42 |
+
"metadata": {},
|
43 |
+
"outputs": [
|
44 |
+
{
|
45 |
+
"name": "stdout",
|
46 |
+
"output_type": "stream",
|
47 |
+
"text": [
|
48 |
+
" 1. open 2. high 3. low 4. close 5. volume ticker\n",
|
49 |
+
"date \n",
|
50 |
+
"2024-05-03 182.10 184.78 178.4200 181.19 75491539.0 TSLA\n",
|
51 |
+
"2024-05-02 182.86 184.60 176.0200 180.01 89148041.0 TSLA\n",
|
52 |
+
"2024-05-01 182.00 185.86 179.0100 179.99 92829719.0 TSLA\n",
|
53 |
+
"2024-04-30 186.98 190.95 182.8401 183.28 127031787.0 TSLA\n",
|
54 |
+
"2024-04-29 188.42 198.87 184.5400 194.05 243869678.0 TSLA\n"
|
55 |
+
]
|
56 |
+
}
|
57 |
+
],
|
58 |
+
"source": [
|
59 |
+
"#Setting up API key to being able to fetch stocks from Alpha Vantage\n",
|
60 |
+
"\n",
|
61 |
+
"api_key = os.environ.get('stocks_api') \n",
|
62 |
+
"ts = TimeSeries(key=api_key, output_format='pandas')\n",
|
63 |
+
"\n",
|
64 |
+
"#Defining a function to fetch stocks\n",
|
65 |
+
"\n",
|
66 |
+
"def fetch_stock_prices(symbol):\n",
|
67 |
+
" # Fetch daily adjusted stock prices; adjust the symbol as needed\n",
|
68 |
+
" data, meta_data = ts.get_daily(symbol=symbol, outputsize='full')\n",
|
69 |
+
" \n",
|
70 |
+
" # Add a new column named 'ticker' and fill it with the ticker name\n",
|
71 |
+
" data['ticker'] = symbol\n",
|
72 |
+
" \n",
|
73 |
+
" return data\n",
|
74 |
+
"\n",
|
75 |
+
"#Usage\n",
|
76 |
+
"symbol = 'TSLA'\n",
|
77 |
+
"stock_data = fetch_stock_prices(symbol)\n",
|
78 |
+
"print(stock_data.head())"
|
79 |
+
]
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"cell_type": "code",
|
83 |
+
"execution_count": null,
|
84 |
+
"metadata": {},
|
85 |
+
"outputs": [
|
86 |
+
{
|
87 |
+
"name": "stdout",
|
88 |
+
"output_type": "stream",
|
89 |
+
"text": [
|
90 |
+
"Data saved to TSLA_stock_price.csv\n"
|
91 |
+
]
|
92 |
+
}
|
93 |
+
],
|
94 |
+
"source": [
|
95 |
+
"# Defining the file path and name\n",
|
96 |
+
"file_path = 'TSLA_stock_price.csv' \n",
|
97 |
+
"\n",
|
98 |
+
"# Saving the DataFrame to CSV\n",
|
99 |
+
"stock_data.to_csv(file_path)\n",
|
100 |
+
"\n",
|
101 |
+
"print(f\"Data saved to {file_path}\")"
|
102 |
+
]
|
103 |
+
}
|
104 |
+
],
|
105 |
+
"metadata": {
|
106 |
+
"kernelspec": {
|
107 |
+
"display_name": "base",
|
108 |
+
"language": "python",
|
109 |
+
"name": "python3"
|
110 |
+
},
|
111 |
+
"language_info": {
|
112 |
+
"codemirror_mode": {
|
113 |
+
"name": "ipython",
|
114 |
+
"version": 3
|
115 |
+
},
|
116 |
+
"file_extension": ".py",
|
117 |
+
"mimetype": "text/x-python",
|
118 |
+
"name": "python",
|
119 |
+
"nbconvert_exporter": "python",
|
120 |
+
"pygments_lexer": "ipython3",
|
121 |
+
"version": "3.11.9"
|
122 |
+
},
|
123 |
+
"orig_nbformat": 4
|
124 |
+
},
|
125 |
+
"nbformat": 4,
|
126 |
+
"nbformat_minor": 2
|
127 |
+
}
|
news_preprocessing.ipynb β Stocks news prediction/Notebooks/3_news_preprocessing.ipynb
RENAMED
@@ -6,6 +6,7 @@
|
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
|
|
9 |
"from dotenv import load_dotenv\n",
|
10 |
"from datetime import datetime, timedelta\n",
|
11 |
"import requests\n",
|
@@ -21,6 +22,7 @@
|
|
21 |
"metadata": {},
|
22 |
"outputs": [],
|
23 |
"source": [
|
|
|
24 |
"def process_news_articles(news_articles):\n",
|
25 |
" # Convert list of dictionaries to DataFrame\n",
|
26 |
" df = pd.DataFrame(news_articles)\n",
|
@@ -40,7 +42,7 @@
|
|
40 |
" df['date'] = df['published_utc'].dt.date\n",
|
41 |
" df['time'] = df['published_utc'].dt.time\n",
|
42 |
"\n",
|
43 |
-
" #
|
44 |
" df.drop(['published_utc'], axis=1, inplace=True)\n",
|
45 |
" # set date to index\n",
|
46 |
" df = df.set_index(\"date\")\n",
|
@@ -57,6 +59,8 @@
|
|
57 |
"metadata": {},
|
58 |
"outputs": [],
|
59 |
"source": [
|
|
|
|
|
60 |
"def exponential_moving_average(df, window):\n",
|
61 |
" # Calculate EMA on the 'sentiment' column\n",
|
62 |
" df[f'exp_mean_{window}_days'] = df['sentiment'].ewm(span=window, adjust=False).mean()\n",
|
|
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
9 |
+
"#Importing necessary libraries\n",
|
10 |
"from dotenv import load_dotenv\n",
|
11 |
"from datetime import datetime, timedelta\n",
|
12 |
"import requests\n",
|
|
|
22 |
"metadata": {},
|
23 |
"outputs": [],
|
24 |
"source": [
|
25 |
+
"#Defining a function to process news articles\n",
|
26 |
"def process_news_articles(news_articles):\n",
|
27 |
" # Convert list of dictionaries to DataFrame\n",
|
28 |
" df = pd.DataFrame(news_articles)\n",
|
|
|
42 |
" df['date'] = df['published_utc'].dt.date\n",
|
43 |
" df['time'] = df['published_utc'].dt.time\n",
|
44 |
"\n",
|
45 |
+
" # Dropping unnecessary columns\n",
|
46 |
" df.drop(['published_utc'], axis=1, inplace=True)\n",
|
47 |
" # set date to index\n",
|
48 |
" df = df.set_index(\"date\")\n",
|
|
|
59 |
"metadata": {},
|
60 |
"outputs": [],
|
61 |
"source": [
|
62 |
+
"#Defining a function for the exponential moving average\n",
|
63 |
+
"\n",
|
64 |
"def exponential_moving_average(df, window):\n",
|
65 |
" # Calculate EMA on the 'sentiment' column\n",
|
66 |
" df[f'exp_mean_{window}_days'] = df['sentiment'].ewm(span=window, adjust=False).mean()\n",
|
stock_preprocessing.ipynb β Stocks news prediction/Notebooks/4_stock_preprocessing.ipynb
RENAMED
@@ -2,10 +2,22 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
"source": [
|
|
|
9 |
"from dotenv import load_dotenv\n",
|
10 |
"import os \n",
|
11 |
"from alpha_vantage.timeseries import TimeSeries\n",
|
@@ -20,7 +32,8 @@
|
|
20 |
"import pandas_market_calendars as mcal\n",
|
21 |
"import datetime\n",
|
22 |
"import numpy as np\n",
|
23 |
-
"from datetime import datetime, timedelta\n"
|
|
|
24 |
]
|
25 |
},
|
26 |
{
|
@@ -43,8 +56,7 @@
|
|
43 |
}
|
44 |
],
|
45 |
"source": [
|
46 |
-
"
|
47 |
-
"\n",
|
48 |
"api_key = os.environ.get('stocks_api') # Replace this with your actual API key\n",
|
49 |
"ts = TimeSeries(key=api_key, output_format='pandas')\n",
|
50 |
"\n",
|
@@ -54,168 +66,6 @@
|
|
54 |
"print(data.head())"
|
55 |
]
|
56 |
},
|
57 |
-
{
|
58 |
-
"cell_type": "code",
|
59 |
-
"execution_count": 3,
|
60 |
-
"metadata": {},
|
61 |
-
"outputs": [
|
62 |
-
{
|
63 |
-
"data": {
|
64 |
-
"text/html": [
|
65 |
-
"<div>\n",
|
66 |
-
"<style scoped>\n",
|
67 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
68 |
-
" vertical-align: middle;\n",
|
69 |
-
" }\n",
|
70 |
-
"\n",
|
71 |
-
" .dataframe tbody tr th {\n",
|
72 |
-
" vertical-align: top;\n",
|
73 |
-
" }\n",
|
74 |
-
"\n",
|
75 |
-
" .dataframe thead th {\n",
|
76 |
-
" text-align: right;\n",
|
77 |
-
" }\n",
|
78 |
-
"</style>\n",
|
79 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
80 |
-
" <thead>\n",
|
81 |
-
" <tr style=\"text-align: right;\">\n",
|
82 |
-
" <th></th>\n",
|
83 |
-
" <th>1. open</th>\n",
|
84 |
-
" <th>2. high</th>\n",
|
85 |
-
" <th>3. low</th>\n",
|
86 |
-
" <th>4. close</th>\n",
|
87 |
-
" <th>5. volume</th>\n",
|
88 |
-
" </tr>\n",
|
89 |
-
" <tr>\n",
|
90 |
-
" <th>date</th>\n",
|
91 |
-
" <th></th>\n",
|
92 |
-
" <th></th>\n",
|
93 |
-
" <th></th>\n",
|
94 |
-
" <th></th>\n",
|
95 |
-
" <th></th>\n",
|
96 |
-
" </tr>\n",
|
97 |
-
" </thead>\n",
|
98 |
-
" <tbody>\n",
|
99 |
-
" <tr>\n",
|
100 |
-
" <th>2024-05-02</th>\n",
|
101 |
-
" <td>182.86</td>\n",
|
102 |
-
" <td>184.6000</td>\n",
|
103 |
-
" <td>176.0200</td>\n",
|
104 |
-
" <td>180.01</td>\n",
|
105 |
-
" <td>89148041.0</td>\n",
|
106 |
-
" </tr>\n",
|
107 |
-
" <tr>\n",
|
108 |
-
" <th>2024-05-01</th>\n",
|
109 |
-
" <td>182.00</td>\n",
|
110 |
-
" <td>185.8600</td>\n",
|
111 |
-
" <td>179.0100</td>\n",
|
112 |
-
" <td>179.99</td>\n",
|
113 |
-
" <td>92829719.0</td>\n",
|
114 |
-
" </tr>\n",
|
115 |
-
" <tr>\n",
|
116 |
-
" <th>2024-04-30</th>\n",
|
117 |
-
" <td>186.98</td>\n",
|
118 |
-
" <td>190.9500</td>\n",
|
119 |
-
" <td>182.8401</td>\n",
|
120 |
-
" <td>183.28</td>\n",
|
121 |
-
" <td>127031787.0</td>\n",
|
122 |
-
" </tr>\n",
|
123 |
-
" <tr>\n",
|
124 |
-
" <th>2024-04-29</th>\n",
|
125 |
-
" <td>188.42</td>\n",
|
126 |
-
" <td>198.8700</td>\n",
|
127 |
-
" <td>184.5400</td>\n",
|
128 |
-
" <td>194.05</td>\n",
|
129 |
-
" <td>243869678.0</td>\n",
|
130 |
-
" </tr>\n",
|
131 |
-
" <tr>\n",
|
132 |
-
" <th>2024-04-26</th>\n",
|
133 |
-
" <td>168.85</td>\n",
|
134 |
-
" <td>172.1200</td>\n",
|
135 |
-
" <td>166.3700</td>\n",
|
136 |
-
" <td>168.29</td>\n",
|
137 |
-
" <td>109815725.0</td>\n",
|
138 |
-
" </tr>\n",
|
139 |
-
" <tr>\n",
|
140 |
-
" <th>...</th>\n",
|
141 |
-
" <td>...</td>\n",
|
142 |
-
" <td>...</td>\n",
|
143 |
-
" <td>...</td>\n",
|
144 |
-
" <td>...</td>\n",
|
145 |
-
" <td>...</td>\n",
|
146 |
-
" </tr>\n",
|
147 |
-
" <tr>\n",
|
148 |
-
" <th>2010-07-06</th>\n",
|
149 |
-
" <td>20.00</td>\n",
|
150 |
-
" <td>20.0000</td>\n",
|
151 |
-
" <td>15.8300</td>\n",
|
152 |
-
" <td>16.11</td>\n",
|
153 |
-
" <td>6866900.0</td>\n",
|
154 |
-
" </tr>\n",
|
155 |
-
" <tr>\n",
|
156 |
-
" <th>2010-07-02</th>\n",
|
157 |
-
" <td>23.00</td>\n",
|
158 |
-
" <td>23.1000</td>\n",
|
159 |
-
" <td>18.7100</td>\n",
|
160 |
-
" <td>19.20</td>\n",
|
161 |
-
" <td>5139800.0</td>\n",
|
162 |
-
" </tr>\n",
|
163 |
-
" <tr>\n",
|
164 |
-
" <th>2010-07-01</th>\n",
|
165 |
-
" <td>25.00</td>\n",
|
166 |
-
" <td>25.9200</td>\n",
|
167 |
-
" <td>20.2700</td>\n",
|
168 |
-
" <td>21.96</td>\n",
|
169 |
-
" <td>8218800.0</td>\n",
|
170 |
-
" </tr>\n",
|
171 |
-
" <tr>\n",
|
172 |
-
" <th>2010-06-30</th>\n",
|
173 |
-
" <td>25.79</td>\n",
|
174 |
-
" <td>30.4192</td>\n",
|
175 |
-
" <td>23.3000</td>\n",
|
176 |
-
" <td>23.83</td>\n",
|
177 |
-
" <td>17187100.0</td>\n",
|
178 |
-
" </tr>\n",
|
179 |
-
" <tr>\n",
|
180 |
-
" <th>2010-06-29</th>\n",
|
181 |
-
" <td>19.00</td>\n",
|
182 |
-
" <td>25.0000</td>\n",
|
183 |
-
" <td>17.5400</td>\n",
|
184 |
-
" <td>23.89</td>\n",
|
185 |
-
" <td>18766300.0</td>\n",
|
186 |
-
" </tr>\n",
|
187 |
-
" </tbody>\n",
|
188 |
-
"</table>\n",
|
189 |
-
"<p>3485 rows Γ 5 columns</p>\n",
|
190 |
-
"</div>"
|
191 |
-
],
|
192 |
-
"text/plain": [
|
193 |
-
" 1. open 2. high 3. low 4. close 5. volume\n",
|
194 |
-
"date \n",
|
195 |
-
"2024-05-02 182.86 184.6000 176.0200 180.01 89148041.0\n",
|
196 |
-
"2024-05-01 182.00 185.8600 179.0100 179.99 92829719.0\n",
|
197 |
-
"2024-04-30 186.98 190.9500 182.8401 183.28 127031787.0\n",
|
198 |
-
"2024-04-29 188.42 198.8700 184.5400 194.05 243869678.0\n",
|
199 |
-
"2024-04-26 168.85 172.1200 166.3700 168.29 109815725.0\n",
|
200 |
-
"... ... ... ... ... ...\n",
|
201 |
-
"2010-07-06 20.00 20.0000 15.8300 16.11 6866900.0\n",
|
202 |
-
"2010-07-02 23.00 23.1000 18.7100 19.20 5139800.0\n",
|
203 |
-
"2010-07-01 25.00 25.9200 20.2700 21.96 8218800.0\n",
|
204 |
-
"2010-06-30 25.79 30.4192 23.3000 23.83 17187100.0\n",
|
205 |
-
"2010-06-29 19.00 25.0000 17.5400 23.89 18766300.0\n",
|
206 |
-
"\n",
|
207 |
-
"[3485 rows x 5 columns]"
|
208 |
-
]
|
209 |
-
},
|
210 |
-
"execution_count": 3,
|
211 |
-
"metadata": {},
|
212 |
-
"output_type": "execute_result"
|
213 |
-
}
|
214 |
-
],
|
215 |
-
"source": [
|
216 |
-
"data"
|
217 |
-
]
|
218 |
-
},
|
219 |
{
|
220 |
"cell_type": "code",
|
221 |
"execution_count": 4,
|
@@ -241,6 +91,7 @@
|
|
241 |
}
|
242 |
],
|
243 |
"source": [
|
|
|
244 |
"data.info()"
|
245 |
]
|
246 |
},
|
@@ -265,6 +116,7 @@
|
|
265 |
}
|
266 |
],
|
267 |
"source": [
|
|
|
268 |
"meta_data"
|
269 |
]
|
270 |
},
|
@@ -293,6 +145,7 @@
|
|
293 |
"metadata": {},
|
294 |
"outputs": [],
|
295 |
"source": [
|
|
|
296 |
"def next_business_day(today):\n",
|
297 |
" \n",
|
298 |
" # Real tomorrow\n",
|
@@ -320,6 +173,7 @@
|
|
320 |
"metadata": {},
|
321 |
"outputs": [],
|
322 |
"source": [
|
|
|
323 |
"def extract_business_day(start_date,end_date):\n",
|
324 |
" \"\"\"\n",
|
325 |
" Given a start_date and end_date.\n",
|
@@ -331,27 +185,27 @@
|
|
331 |
" e.g is_open = [1,0,...,1] means that start_date = open, day after start_date = closed, and end_date = open\n",
|
332 |
" \"\"\"\n",
|
333 |
" \n",
|
334 |
-
" #
|
335 |
" end_date_save = end_date\n",
|
336 |
" \n",
|
337 |
-
" #
|
338 |
" cal = mcal.get_calendar('NYSE')\n",
|
339 |
"\n",
|
340 |
-
" #
|
341 |
" schedule = cal.schedule(start_date=start_date, end_date=end_date)\n",
|
342 |
" \n",
|
343 |
" # Only need a list of dates when it's open (not open and close times)\n",
|
344 |
" isBusinessDay = np.array(schedule.market_open.dt.strftime('%Y-%m-%d')) \n",
|
345 |
" \n",
|
346 |
-
" #
|
347 |
" delta = datetime.timedelta(days=1)\n",
|
348 |
" start_date = datetime.datetime.strptime(start_date,\"%Y-%m-%d\") #datetime.date(2015, 7, 16)\n",
|
349 |
" end_date = datetime.datetime.strptime(end_date,\"%Y-%m-%d\") #datetime.date(2023, 1, 4)\n",
|
350 |
" \n",
|
351 |
-
" #
|
352 |
" num_days = (end_date - start_date).days + 1\n",
|
353 |
" \n",
|
354 |
-
" #
|
355 |
" is_open = np.zeros(num_days)\n",
|
356 |
" \n",
|
357 |
" # iterate over range of dates\n",
|
@@ -386,6 +240,7 @@
|
|
386 |
"metadata": {},
|
387 |
"outputs": [],
|
388 |
"source": [
|
|
|
389 |
"def clean_column_name(name):\n",
|
390 |
" # Remove all non-letter characters\n",
|
391 |
" cleaned_name = re.sub(r'[^a-zA-Z]', '', name)\n",
|
@@ -617,178 +472,13 @@
|
|
617 |
"data.head()"
|
618 |
]
|
619 |
},
|
620 |
-
{
|
621 |
-
"cell_type": "code",
|
622 |
-
"execution_count": 13,
|
623 |
-
"metadata": {},
|
624 |
-
"outputs": [
|
625 |
-
{
|
626 |
-
"data": {
|
627 |
-
"text/html": [
|
628 |
-
"<div>\n",
|
629 |
-
"<style scoped>\n",
|
630 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
631 |
-
" vertical-align: middle;\n",
|
632 |
-
" }\n",
|
633 |
-
"\n",
|
634 |
-
" .dataframe tbody tr th {\n",
|
635 |
-
" vertical-align: top;\n",
|
636 |
-
" }\n",
|
637 |
-
"\n",
|
638 |
-
" .dataframe thead th {\n",
|
639 |
-
" text-align: right;\n",
|
640 |
-
" }\n",
|
641 |
-
"</style>\n",
|
642 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
643 |
-
" <thead>\n",
|
644 |
-
" <tr style=\"text-align: right;\">\n",
|
645 |
-
" <th></th>\n",
|
646 |
-
" <th>date</th>\n",
|
647 |
-
" <th>open</th>\n",
|
648 |
-
" <th>high</th>\n",
|
649 |
-
" <th>low</th>\n",
|
650 |
-
" <th>close</th>\n",
|
651 |
-
" <th>volume</th>\n",
|
652 |
-
" </tr>\n",
|
653 |
-
" </thead>\n",
|
654 |
-
" <tbody>\n",
|
655 |
-
" <tr>\n",
|
656 |
-
" <th>0</th>\n",
|
657 |
-
" <td>2024-05-02</td>\n",
|
658 |
-
" <td>182.86</td>\n",
|
659 |
-
" <td>184.6000</td>\n",
|
660 |
-
" <td>176.0200</td>\n",
|
661 |
-
" <td>180.01</td>\n",
|
662 |
-
" <td>89148041.0</td>\n",
|
663 |
-
" </tr>\n",
|
664 |
-
" <tr>\n",
|
665 |
-
" <th>1</th>\n",
|
666 |
-
" <td>2024-05-01</td>\n",
|
667 |
-
" <td>182.00</td>\n",
|
668 |
-
" <td>185.8600</td>\n",
|
669 |
-
" <td>179.0100</td>\n",
|
670 |
-
" <td>179.99</td>\n",
|
671 |
-
" <td>92829719.0</td>\n",
|
672 |
-
" </tr>\n",
|
673 |
-
" <tr>\n",
|
674 |
-
" <th>2</th>\n",
|
675 |
-
" <td>2024-04-30</td>\n",
|
676 |
-
" <td>186.98</td>\n",
|
677 |
-
" <td>190.9500</td>\n",
|
678 |
-
" <td>182.8401</td>\n",
|
679 |
-
" <td>183.28</td>\n",
|
680 |
-
" <td>127031787.0</td>\n",
|
681 |
-
" </tr>\n",
|
682 |
-
" <tr>\n",
|
683 |
-
" <th>3</th>\n",
|
684 |
-
" <td>2024-04-29</td>\n",
|
685 |
-
" <td>188.42</td>\n",
|
686 |
-
" <td>198.8700</td>\n",
|
687 |
-
" <td>184.5400</td>\n",
|
688 |
-
" <td>194.05</td>\n",
|
689 |
-
" <td>243869678.0</td>\n",
|
690 |
-
" </tr>\n",
|
691 |
-
" <tr>\n",
|
692 |
-
" <th>4</th>\n",
|
693 |
-
" <td>2024-04-26</td>\n",
|
694 |
-
" <td>168.85</td>\n",
|
695 |
-
" <td>172.1200</td>\n",
|
696 |
-
" <td>166.3700</td>\n",
|
697 |
-
" <td>168.29</td>\n",
|
698 |
-
" <td>109815725.0</td>\n",
|
699 |
-
" </tr>\n",
|
700 |
-
" <tr>\n",
|
701 |
-
" <th>...</th>\n",
|
702 |
-
" <td>...</td>\n",
|
703 |
-
" <td>...</td>\n",
|
704 |
-
" <td>...</td>\n",
|
705 |
-
" <td>...</td>\n",
|
706 |
-
" <td>...</td>\n",
|
707 |
-
" <td>...</td>\n",
|
708 |
-
" </tr>\n",
|
709 |
-
" <tr>\n",
|
710 |
-
" <th>3480</th>\n",
|
711 |
-
" <td>2010-07-06</td>\n",
|
712 |
-
" <td>20.00</td>\n",
|
713 |
-
" <td>20.0000</td>\n",
|
714 |
-
" <td>15.8300</td>\n",
|
715 |
-
" <td>16.11</td>\n",
|
716 |
-
" <td>6866900.0</td>\n",
|
717 |
-
" </tr>\n",
|
718 |
-
" <tr>\n",
|
719 |
-
" <th>3481</th>\n",
|
720 |
-
" <td>2010-07-02</td>\n",
|
721 |
-
" <td>23.00</td>\n",
|
722 |
-
" <td>23.1000</td>\n",
|
723 |
-
" <td>18.7100</td>\n",
|
724 |
-
" <td>19.20</td>\n",
|
725 |
-
" <td>5139800.0</td>\n",
|
726 |
-
" </tr>\n",
|
727 |
-
" <tr>\n",
|
728 |
-
" <th>3482</th>\n",
|
729 |
-
" <td>2010-07-01</td>\n",
|
730 |
-
" <td>25.00</td>\n",
|
731 |
-
" <td>25.9200</td>\n",
|
732 |
-
" <td>20.2700</td>\n",
|
733 |
-
" <td>21.96</td>\n",
|
734 |
-
" <td>8218800.0</td>\n",
|
735 |
-
" </tr>\n",
|
736 |
-
" <tr>\n",
|
737 |
-
" <th>3483</th>\n",
|
738 |
-
" <td>2010-06-30</td>\n",
|
739 |
-
" <td>25.79</td>\n",
|
740 |
-
" <td>30.4192</td>\n",
|
741 |
-
" <td>23.3000</td>\n",
|
742 |
-
" <td>23.83</td>\n",
|
743 |
-
" <td>17187100.0</td>\n",
|
744 |
-
" </tr>\n",
|
745 |
-
" <tr>\n",
|
746 |
-
" <th>3484</th>\n",
|
747 |
-
" <td>2010-06-29</td>\n",
|
748 |
-
" <td>19.00</td>\n",
|
749 |
-
" <td>25.0000</td>\n",
|
750 |
-
" <td>17.5400</td>\n",
|
751 |
-
" <td>23.89</td>\n",
|
752 |
-
" <td>18766300.0</td>\n",
|
753 |
-
" </tr>\n",
|
754 |
-
" </tbody>\n",
|
755 |
-
"</table>\n",
|
756 |
-
"<p>3485 rows Γ 6 columns</p>\n",
|
757 |
-
"</div>"
|
758 |
-
],
|
759 |
-
"text/plain": [
|
760 |
-
" date open high low close volume\n",
|
761 |
-
"0 2024-05-02 182.86 184.6000 176.0200 180.01 89148041.0\n",
|
762 |
-
"1 2024-05-01 182.00 185.8600 179.0100 179.99 92829719.0\n",
|
763 |
-
"2 2024-04-30 186.98 190.9500 182.8401 183.28 127031787.0\n",
|
764 |
-
"3 2024-04-29 188.42 198.8700 184.5400 194.05 243869678.0\n",
|
765 |
-
"4 2024-04-26 168.85 172.1200 166.3700 168.29 109815725.0\n",
|
766 |
-
"... ... ... ... ... ... ...\n",
|
767 |
-
"3480 2010-07-06 20.00 20.0000 15.8300 16.11 6866900.0\n",
|
768 |
-
"3481 2010-07-02 23.00 23.1000 18.7100 19.20 5139800.0\n",
|
769 |
-
"3482 2010-07-01 25.00 25.9200 20.2700 21.96 8218800.0\n",
|
770 |
-
"3483 2010-06-30 25.79 30.4192 23.3000 23.83 17187100.0\n",
|
771 |
-
"3484 2010-06-29 19.00 25.0000 17.5400 23.89 18766300.0\n",
|
772 |
-
"\n",
|
773 |
-
"[3485 rows x 6 columns]"
|
774 |
-
]
|
775 |
-
},
|
776 |
-
"execution_count": 13,
|
777 |
-
"metadata": {},
|
778 |
-
"output_type": "execute_result"
|
779 |
-
}
|
780 |
-
],
|
781 |
-
"source": [
|
782 |
-
"data"
|
783 |
-
]
|
784 |
-
},
|
785 |
{
|
786 |
"cell_type": "code",
|
787 |
"execution_count": 42,
|
788 |
"metadata": {},
|
789 |
"outputs": [],
|
790 |
"source": [
|
791 |
-
"# Define the date range
|
792 |
"yesterday =datetime.now()-timedelta(days=1)\n",
|
793 |
"two_years_back = yesterday - timedelta(days=684)"
|
794 |
]
|
@@ -799,7 +489,7 @@
|
|
799 |
"metadata": {},
|
800 |
"outputs": [],
|
801 |
"source": [
|
802 |
-
"#
|
803 |
"filtered_df = data[(data['date'] >= two_years_back) & (data['date'] <= yesterday)]"
|
804 |
]
|
805 |
},
|
@@ -943,13 +633,6 @@
|
|
943 |
"source": [
|
944 |
"filtered_df.shape"
|
945 |
]
|
946 |
-
},
|
947 |
-
{
|
948 |
-
"cell_type": "code",
|
949 |
-
"execution_count": null,
|
950 |
-
"metadata": {},
|
951 |
-
"outputs": [],
|
952 |
-
"source": []
|
953 |
}
|
954 |
],
|
955 |
"metadata": {
|
@@ -968,7 +651,7 @@
|
|
968 |
"name": "python",
|
969 |
"nbconvert_exporter": "python",
|
970 |
"pygments_lexer": "ipython3",
|
971 |
-
"version": "3.11.
|
972 |
},
|
973 |
"orig_nbformat": 4
|
974 |
},
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 2,
|
6 |
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"data": {
|
10 |
+
"text/plain": [
|
11 |
+
"True"
|
12 |
+
]
|
13 |
+
},
|
14 |
+
"execution_count": 2,
|
15 |
+
"metadata": {},
|
16 |
+
"output_type": "execute_result"
|
17 |
+
}
|
18 |
+
],
|
19 |
"source": [
|
20 |
+
"#Importing necessary libraries\n",
|
21 |
"from dotenv import load_dotenv\n",
|
22 |
"import os \n",
|
23 |
"from alpha_vantage.timeseries import TimeSeries\n",
|
|
|
32 |
"import pandas_market_calendars as mcal\n",
|
33 |
"import datetime\n",
|
34 |
"import numpy as np\n",
|
35 |
+
"from datetime import datetime, timedelta\n",
|
36 |
+
"load_dotenv()"
|
37 |
]
|
38 |
},
|
39 |
{
|
|
|
56 |
}
|
57 |
],
|
58 |
"source": [
|
59 |
+
"#Connecting to Alpha vantage using API key\n",
|
|
|
60 |
"api_key = os.environ.get('stocks_api') # Replace this with your actual API key\n",
|
61 |
"ts = TimeSeries(key=api_key, output_format='pandas')\n",
|
62 |
"\n",
|
|
|
66 |
"print(data.head())"
|
67 |
]
|
68 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
{
|
70 |
"cell_type": "code",
|
71 |
"execution_count": 4,
|
|
|
91 |
}
|
92 |
],
|
93 |
"source": [
|
94 |
+
"#Looking at data info\n",
|
95 |
"data.info()"
|
96 |
]
|
97 |
},
|
|
|
116 |
}
|
117 |
],
|
118 |
"source": [
|
119 |
+
"#Looking at the meta data\n",
|
120 |
"meta_data"
|
121 |
]
|
122 |
},
|
|
|
145 |
"metadata": {},
|
146 |
"outputs": [],
|
147 |
"source": [
|
148 |
+
"#Defining a function to find the next business day\n",
|
149 |
"def next_business_day(today):\n",
|
150 |
" \n",
|
151 |
" # Real tomorrow\n",
|
|
|
173 |
"metadata": {},
|
174 |
"outputs": [],
|
175 |
"source": [
|
176 |
+
"#Defining a function to extract business day\n",
|
177 |
"def extract_business_day(start_date,end_date):\n",
|
178 |
" \"\"\"\n",
|
179 |
" Given a start_date and end_date.\n",
|
|
|
185 |
" e.g is_open = [1,0,...,1] means that start_date = open, day after start_date = closed, and end_date = open\n",
|
186 |
" \"\"\"\n",
|
187 |
" \n",
|
188 |
+
" # Saving for later\n",
|
189 |
" end_date_save = end_date\n",
|
190 |
" \n",
|
191 |
+
" # Getting the NYSE calendar\n",
|
192 |
" cal = mcal.get_calendar('NYSE')\n",
|
193 |
"\n",
|
194 |
+
" # Getting the NYSE calendar's open and close times for the specified period\n",
|
195 |
" schedule = cal.schedule(start_date=start_date, end_date=end_date)\n",
|
196 |
" \n",
|
197 |
" # Only need a list of dates when it's open (not open and close times)\n",
|
198 |
" isBusinessDay = np.array(schedule.market_open.dt.strftime('%Y-%m-%d')) \n",
|
199 |
" \n",
|
200 |
+
" # Going over all days: \n",
|
201 |
" delta = datetime.timedelta(days=1)\n",
|
202 |
" start_date = datetime.datetime.strptime(start_date,\"%Y-%m-%d\") #datetime.date(2015, 7, 16)\n",
|
203 |
" end_date = datetime.datetime.strptime(end_date,\"%Y-%m-%d\") #datetime.date(2023, 1, 4)\n",
|
204 |
" \n",
|
205 |
+
" # Extracting days from the timedelta object\n",
|
206 |
" num_days = (end_date - start_date).days + 1\n",
|
207 |
" \n",
|
208 |
+
" # Creating a boolean array for days being open (1) and closed (0) \n",
|
209 |
" is_open = np.zeros(num_days)\n",
|
210 |
" \n",
|
211 |
" # iterate over range of dates\n",
|
|
|
240 |
"metadata": {},
|
241 |
"outputs": [],
|
242 |
"source": [
|
243 |
+
"#Defining a function to clean the column names\n",
|
244 |
"def clean_column_name(name):\n",
|
245 |
" # Remove all non-letter characters\n",
|
246 |
" cleaned_name = re.sub(r'[^a-zA-Z]', '', name)\n",
|
|
|
472 |
"data.head()"
|
473 |
]
|
474 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
475 |
{
|
476 |
"cell_type": "code",
|
477 |
"execution_count": 42,
|
478 |
"metadata": {},
|
479 |
"outputs": [],
|
480 |
"source": [
|
481 |
+
"# Define the date range we're interested in\n",
|
482 |
"yesterday =datetime.now()-timedelta(days=1)\n",
|
483 |
"two_years_back = yesterday - timedelta(days=684)"
|
484 |
]
|
|
|
489 |
"metadata": {},
|
490 |
"outputs": [],
|
491 |
"source": [
|
492 |
+
"# Filtering the DataFrame to this range\n",
|
493 |
"filtered_df = data[(data['date'] >= two_years_back) & (data['date'] <= yesterday)]"
|
494 |
]
|
495 |
},
|
|
|
633 |
"source": [
|
634 |
"filtered_df.shape"
|
635 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
636 |
}
|
637 |
],
|
638 |
"metadata": {
|
|
|
651 |
"name": "python",
|
652 |
"nbconvert_exporter": "python",
|
653 |
"pygments_lexer": "ipython3",
|
654 |
+
"version": "3.11.9"
|
655 |
},
|
656 |
"orig_nbformat": 4
|
657 |
},
|
Stocks news prediction/Notebooks/5_feature_pipeline.ipynb
ADDED
@@ -0,0 +1,493 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 25,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stdout",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"Connection closed.\n",
|
13 |
+
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
14 |
+
]
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"name": "stdout",
|
18 |
+
"output_type": "stream",
|
19 |
+
"text": [
|
20 |
+
"\n",
|
21 |
+
"Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/693399\n",
|
22 |
+
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
23 |
+
]
|
24 |
+
}
|
25 |
+
],
|
26 |
+
"source": [
|
27 |
+
"# Import necessary libraries\n",
|
28 |
+
"import pandas as pd # For data manipulation using DataFrames\n",
|
29 |
+
"import numpy as np # For numerical operations\n",
|
30 |
+
"import matplotlib.pyplot as plt # For data visualization\n",
|
31 |
+
"import os # For operating system-related tasks\n",
|
32 |
+
"import joblib # For saving and loading models\n",
|
33 |
+
"import hopsworks # For getting access to hopsworks\n",
|
34 |
+
"import re\n",
|
35 |
+
"\n",
|
36 |
+
"# Import specific modules from scikit-learn\n",
|
37 |
+
"from sklearn.preprocessing import StandardScaler, OneHotEncoder # For data preprocessing\n",
|
38 |
+
"from sklearn.metrics import accuracy_score # For evaluating model accuracy\n",
|
39 |
+
"\n",
|
40 |
+
"from dotenv import load_dotenv\n",
|
41 |
+
"import os\n",
|
42 |
+
"load_dotenv()\n",
|
43 |
+
"\n",
|
44 |
+
"#Connecting to hopsworks\n",
|
45 |
+
"api_key = os.environ.get('hopsworks_api')\n",
|
46 |
+
"project = hopsworks.login(api_key_value=api_key)\n",
|
47 |
+
"fs = project.get_feature_store()"
|
48 |
+
]
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"cell_type": "code",
|
52 |
+
"execution_count": 26,
|
53 |
+
"metadata": {},
|
54 |
+
"outputs": [
|
55 |
+
{
|
56 |
+
"name": "stdout",
|
57 |
+
"output_type": "stream",
|
58 |
+
"text": [
|
59 |
+
" date 1. open 2. high 3. low 4. close 5. volume ticker\n",
|
60 |
+
"0 2024-05-03 182.10 184.78 178.4200 181.19 75491539.0 TSLA\n",
|
61 |
+
"1 2024-05-02 182.86 184.60 176.0200 180.01 89148041.0 TSLA\n",
|
62 |
+
"2 2024-05-01 182.00 185.86 179.0100 179.99 92829719.0 TSLA\n",
|
63 |
+
"3 2024-04-30 186.98 190.95 182.8401 183.28 127031787.0 TSLA\n",
|
64 |
+
"4 2024-04-29 188.42 198.87 184.5400 194.05 243869678.0 TSLA\n"
|
65 |
+
]
|
66 |
+
}
|
67 |
+
],
|
68 |
+
"source": [
|
69 |
+
"# Load and display the data from CSV to confirm\n",
|
70 |
+
"tsla_df = pd.read_csv('TSLA_stock_price.csv')\n",
|
71 |
+
"print(tsla_df.head()) "
|
72 |
+
]
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"cell_type": "code",
|
76 |
+
"execution_count": 27,
|
77 |
+
"metadata": {},
|
78 |
+
"outputs": [],
|
79 |
+
"source": [
|
80 |
+
"#Defining a function to clean the column names\n",
|
81 |
+
"def clean_column_name(name):\n",
|
82 |
+
" # Remove all non-letter characters\n",
|
83 |
+
" cleaned_name = re.sub(r'[^a-zA-Z]', '', name)\n",
|
84 |
+
" return cleaned_name"
|
85 |
+
]
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"cell_type": "code",
|
89 |
+
"execution_count": 28,
|
90 |
+
"metadata": {},
|
91 |
+
"outputs": [
|
92 |
+
{
|
93 |
+
"data": {
|
94 |
+
"text/html": [
|
95 |
+
"<div>\n",
|
96 |
+
"<style scoped>\n",
|
97 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
98 |
+
" vertical-align: middle;\n",
|
99 |
+
" }\n",
|
100 |
+
"\n",
|
101 |
+
" .dataframe tbody tr th {\n",
|
102 |
+
" vertical-align: top;\n",
|
103 |
+
" }\n",
|
104 |
+
"\n",
|
105 |
+
" .dataframe thead th {\n",
|
106 |
+
" text-align: right;\n",
|
107 |
+
" }\n",
|
108 |
+
"</style>\n",
|
109 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
110 |
+
" <thead>\n",
|
111 |
+
" <tr style=\"text-align: right;\">\n",
|
112 |
+
" <th></th>\n",
|
113 |
+
" <th>date</th>\n",
|
114 |
+
" <th>1. open</th>\n",
|
115 |
+
" <th>2. high</th>\n",
|
116 |
+
" <th>3. low</th>\n",
|
117 |
+
" <th>4. close</th>\n",
|
118 |
+
" <th>5. volume</th>\n",
|
119 |
+
" <th>ticker</th>\n",
|
120 |
+
" </tr>\n",
|
121 |
+
" </thead>\n",
|
122 |
+
" <tbody>\n",
|
123 |
+
" <tr>\n",
|
124 |
+
" <th>0</th>\n",
|
125 |
+
" <td>2024-05-03</td>\n",
|
126 |
+
" <td>182.10</td>\n",
|
127 |
+
" <td>184.7800</td>\n",
|
128 |
+
" <td>178.4200</td>\n",
|
129 |
+
" <td>181.19</td>\n",
|
130 |
+
" <td>75491539.0</td>\n",
|
131 |
+
" <td>TSLA</td>\n",
|
132 |
+
" </tr>\n",
|
133 |
+
" <tr>\n",
|
134 |
+
" <th>1</th>\n",
|
135 |
+
" <td>2024-05-02</td>\n",
|
136 |
+
" <td>182.86</td>\n",
|
137 |
+
" <td>184.6000</td>\n",
|
138 |
+
" <td>176.0200</td>\n",
|
139 |
+
" <td>180.01</td>\n",
|
140 |
+
" <td>89148041.0</td>\n",
|
141 |
+
" <td>TSLA</td>\n",
|
142 |
+
" </tr>\n",
|
143 |
+
" <tr>\n",
|
144 |
+
" <th>2</th>\n",
|
145 |
+
" <td>2024-05-01</td>\n",
|
146 |
+
" <td>182.00</td>\n",
|
147 |
+
" <td>185.8600</td>\n",
|
148 |
+
" <td>179.0100</td>\n",
|
149 |
+
" <td>179.99</td>\n",
|
150 |
+
" <td>92829719.0</td>\n",
|
151 |
+
" <td>TSLA</td>\n",
|
152 |
+
" </tr>\n",
|
153 |
+
" <tr>\n",
|
154 |
+
" <th>3</th>\n",
|
155 |
+
" <td>2024-04-30</td>\n",
|
156 |
+
" <td>186.98</td>\n",
|
157 |
+
" <td>190.9500</td>\n",
|
158 |
+
" <td>182.8401</td>\n",
|
159 |
+
" <td>183.28</td>\n",
|
160 |
+
" <td>127031787.0</td>\n",
|
161 |
+
" <td>TSLA</td>\n",
|
162 |
+
" </tr>\n",
|
163 |
+
" <tr>\n",
|
164 |
+
" <th>4</th>\n",
|
165 |
+
" <td>2024-04-29</td>\n",
|
166 |
+
" <td>188.42</td>\n",
|
167 |
+
" <td>198.8700</td>\n",
|
168 |
+
" <td>184.5400</td>\n",
|
169 |
+
" <td>194.05</td>\n",
|
170 |
+
" <td>243869678.0</td>\n",
|
171 |
+
" <td>TSLA</td>\n",
|
172 |
+
" </tr>\n",
|
173 |
+
" <tr>\n",
|
174 |
+
" <th>...</th>\n",
|
175 |
+
" <td>...</td>\n",
|
176 |
+
" <td>...</td>\n",
|
177 |
+
" <td>...</td>\n",
|
178 |
+
" <td>...</td>\n",
|
179 |
+
" <td>...</td>\n",
|
180 |
+
" <td>...</td>\n",
|
181 |
+
" <td>...</td>\n",
|
182 |
+
" </tr>\n",
|
183 |
+
" <tr>\n",
|
184 |
+
" <th>3481</th>\n",
|
185 |
+
" <td>2010-07-06</td>\n",
|
186 |
+
" <td>20.00</td>\n",
|
187 |
+
" <td>20.0000</td>\n",
|
188 |
+
" <td>15.8300</td>\n",
|
189 |
+
" <td>16.11</td>\n",
|
190 |
+
" <td>6866900.0</td>\n",
|
191 |
+
" <td>TSLA</td>\n",
|
192 |
+
" </tr>\n",
|
193 |
+
" <tr>\n",
|
194 |
+
" <th>3482</th>\n",
|
195 |
+
" <td>2010-07-02</td>\n",
|
196 |
+
" <td>23.00</td>\n",
|
197 |
+
" <td>23.1000</td>\n",
|
198 |
+
" <td>18.7100</td>\n",
|
199 |
+
" <td>19.20</td>\n",
|
200 |
+
" <td>5139800.0</td>\n",
|
201 |
+
" <td>TSLA</td>\n",
|
202 |
+
" </tr>\n",
|
203 |
+
" <tr>\n",
|
204 |
+
" <th>3483</th>\n",
|
205 |
+
" <td>2010-07-01</td>\n",
|
206 |
+
" <td>25.00</td>\n",
|
207 |
+
" <td>25.9200</td>\n",
|
208 |
+
" <td>20.2700</td>\n",
|
209 |
+
" <td>21.96</td>\n",
|
210 |
+
" <td>8218800.0</td>\n",
|
211 |
+
" <td>TSLA</td>\n",
|
212 |
+
" </tr>\n",
|
213 |
+
" <tr>\n",
|
214 |
+
" <th>3484</th>\n",
|
215 |
+
" <td>2010-06-30</td>\n",
|
216 |
+
" <td>25.79</td>\n",
|
217 |
+
" <td>30.4192</td>\n",
|
218 |
+
" <td>23.3000</td>\n",
|
219 |
+
" <td>23.83</td>\n",
|
220 |
+
" <td>17187100.0</td>\n",
|
221 |
+
" <td>TSLA</td>\n",
|
222 |
+
" </tr>\n",
|
223 |
+
" <tr>\n",
|
224 |
+
" <th>3485</th>\n",
|
225 |
+
" <td>2010-06-29</td>\n",
|
226 |
+
" <td>19.00</td>\n",
|
227 |
+
" <td>25.0000</td>\n",
|
228 |
+
" <td>17.5400</td>\n",
|
229 |
+
" <td>23.89</td>\n",
|
230 |
+
" <td>18766300.0</td>\n",
|
231 |
+
" <td>TSLA</td>\n",
|
232 |
+
" </tr>\n",
|
233 |
+
" </tbody>\n",
|
234 |
+
"</table>\n",
|
235 |
+
"<p>3486 rows Γ 7 columns</p>\n",
|
236 |
+
"</div>"
|
237 |
+
],
|
238 |
+
"text/plain": [
|
239 |
+
" date 1. open 2. high 3. low 4. close 5. volume ticker\n",
|
240 |
+
"0 2024-05-03 182.10 184.7800 178.4200 181.19 75491539.0 TSLA\n",
|
241 |
+
"1 2024-05-02 182.86 184.6000 176.0200 180.01 89148041.0 TSLA\n",
|
242 |
+
"2 2024-05-01 182.00 185.8600 179.0100 179.99 92829719.0 TSLA\n",
|
243 |
+
"3 2024-04-30 186.98 190.9500 182.8401 183.28 127031787.0 TSLA\n",
|
244 |
+
"4 2024-04-29 188.42 198.8700 184.5400 194.05 243869678.0 TSLA\n",
|
245 |
+
"... ... ... ... ... ... ... ...\n",
|
246 |
+
"3481 2010-07-06 20.00 20.0000 15.8300 16.11 6866900.0 TSLA\n",
|
247 |
+
"3482 2010-07-02 23.00 23.1000 18.7100 19.20 5139800.0 TSLA\n",
|
248 |
+
"3483 2010-07-01 25.00 25.9200 20.2700 21.96 8218800.0 TSLA\n",
|
249 |
+
"3484 2010-06-30 25.79 30.4192 23.3000 23.83 17187100.0 TSLA\n",
|
250 |
+
"3485 2010-06-29 19.00 25.0000 17.5400 23.89 18766300.0 TSLA\n",
|
251 |
+
"\n",
|
252 |
+
"[3486 rows x 7 columns]"
|
253 |
+
]
|
254 |
+
},
|
255 |
+
"execution_count": 28,
|
256 |
+
"metadata": {},
|
257 |
+
"output_type": "execute_result"
|
258 |
+
}
|
259 |
+
],
|
260 |
+
"source": [
|
261 |
+
"tsla_df"
|
262 |
+
]
|
263 |
+
},
|
264 |
+
{
|
265 |
+
"cell_type": "code",
|
266 |
+
"execution_count": 30,
|
267 |
+
"metadata": {},
|
268 |
+
"outputs": [
|
269 |
+
{
|
270 |
+
"name": "stdout",
|
271 |
+
"output_type": "stream",
|
272 |
+
"text": [
|
273 |
+
"Index(['date', 'open', 'high', 'low', 'close', 'volume', 'ticker'], dtype='object')\n"
|
274 |
+
]
|
275 |
+
}
|
276 |
+
],
|
277 |
+
"source": [
|
278 |
+
"# Cleaning up column names for 'tsla_df'\n",
|
279 |
+
"tsla_df.columns = [clean_column_name(col) for col in tsla_df.columns]\n",
|
280 |
+
"print(tsla_df.columns)"
|
281 |
+
]
|
282 |
+
},
|
283 |
+
{
|
284 |
+
"cell_type": "code",
|
285 |
+
"execution_count": 31,
|
286 |
+
"metadata": {},
|
287 |
+
"outputs": [],
|
288 |
+
"source": [
|
289 |
+
"# Converting the \"date\" column to timestamp\n",
|
290 |
+
"tsla_df['date'] = pd.to_datetime(tsla_df['date'])"
|
291 |
+
]
|
292 |
+
},
|
293 |
+
{
|
294 |
+
"cell_type": "code",
|
295 |
+
"execution_count": 32,
|
296 |
+
"metadata": {},
|
297 |
+
"outputs": [],
|
298 |
+
"source": [
|
299 |
+
"# Defining the stocks feature group\n",
|
300 |
+
"tesla_fg = fs.get_or_create_feature_group(\n",
|
301 |
+
" name=\"tesla_stock\",\n",
|
302 |
+
" description=\"Tesla stock dataset from alpha vantage\",\n",
|
303 |
+
" version=1,\n",
|
304 |
+
" primary_key=[\"ticker\"],\n",
|
305 |
+
" event_time=['date'],\n",
|
306 |
+
" online_enabled=False,\n",
|
307 |
+
")"
|
308 |
+
]
|
309 |
+
},
|
310 |
+
{
|
311 |
+
"cell_type": "code",
|
312 |
+
"execution_count": 18,
|
313 |
+
"metadata": {},
|
314 |
+
"outputs": [
|
315 |
+
{
|
316 |
+
"name": "stdout",
|
317 |
+
"output_type": "stream",
|
318 |
+
"text": [
|
319 |
+
"Feature Group created successfully, explore it at \n",
|
320 |
+
"https://c.app.hopsworks.ai:443/p/693399/fs/689222/fg/786781\n"
|
321 |
+
]
|
322 |
+
},
|
323 |
+
{
|
324 |
+
"data": {
|
325 |
+
"application/vnd.jupyter.widget-view+json": {
|
326 |
+
"model_id": "b3248b9d522a467db9ce202ef5815fe9",
|
327 |
+
"version_major": 2,
|
328 |
+
"version_minor": 0
|
329 |
+
},
|
330 |
+
"text/plain": [
|
331 |
+
"Uploading Dataframe: 0.00% | | Rows 0/3486 | Elapsed Time: 00:00 | Remaining Time: ?"
|
332 |
+
]
|
333 |
+
},
|
334 |
+
"metadata": {},
|
335 |
+
"output_type": "display_data"
|
336 |
+
},
|
337 |
+
{
|
338 |
+
"name": "stdout",
|
339 |
+
"output_type": "stream",
|
340 |
+
"text": [
|
341 |
+
"Launching job: tesla_stock_1_offline_fg_materialization\n",
|
342 |
+
"Job started successfully, you can follow the progress at \n",
|
343 |
+
"https://c.app.hopsworks.ai/p/693399/jobs/named/tesla_stock_1_offline_fg_materialization/executions\n"
|
344 |
+
]
|
345 |
+
},
|
346 |
+
{
|
347 |
+
"data": {
|
348 |
+
"text/plain": [
|
349 |
+
"(<hsfs.core.job.Job at 0x19cffe27490>, None)"
|
350 |
+
]
|
351 |
+
},
|
352 |
+
"execution_count": 18,
|
353 |
+
"metadata": {},
|
354 |
+
"output_type": "execute_result"
|
355 |
+
}
|
356 |
+
],
|
357 |
+
"source": [
|
358 |
+
"#Inserting the stock data into the stocks feature group\n",
|
359 |
+
"tesla_fg.insert(tsla_df, write_options={\"wait_for_job\" : False})"
|
360 |
+
]
|
361 |
+
},
|
362 |
+
{
|
363 |
+
"cell_type": "code",
|
364 |
+
"execution_count": 19,
|
365 |
+
"metadata": {},
|
366 |
+
"outputs": [],
|
367 |
+
"source": [
|
368 |
+
"#Collecting news df\n",
|
369 |
+
"news_df = pd.read_csv('news_articles_ema.csv')"
|
370 |
+
]
|
371 |
+
},
|
372 |
+
{
|
373 |
+
"cell_type": "code",
|
374 |
+
"execution_count": 20,
|
375 |
+
"metadata": {},
|
376 |
+
"outputs": [],
|
377 |
+
"source": [
|
378 |
+
"#Dropping exp mean 7 days\n",
|
379 |
+
"news_df_updated = news_df.drop(columns=['exp_mean_7_days'])"
|
380 |
+
]
|
381 |
+
},
|
382 |
+
{
|
383 |
+
"cell_type": "code",
|
384 |
+
"execution_count": 21,
|
385 |
+
"metadata": {},
|
386 |
+
"outputs": [],
|
387 |
+
"source": [
|
388 |
+
"#Updating date to datetime\n",
|
389 |
+
"news_df_updated['date'] = pd.to_datetime(news_df_updated['date'])"
|
390 |
+
]
|
391 |
+
},
|
392 |
+
{
|
393 |
+
"cell_type": "code",
|
394 |
+
"execution_count": 22,
|
395 |
+
"metadata": {},
|
396 |
+
"outputs": [
|
397 |
+
{
|
398 |
+
"name": "stdout",
|
399 |
+
"output_type": "stream",
|
400 |
+
"text": [
|
401 |
+
"2024-05-06 13:43:12,343 WARNING: DeprecationWarning: Providing event_time as a single-element list is deprecated and will be dropped in future versions. Provide the feature_name string instead.\n",
|
402 |
+
"\n"
|
403 |
+
]
|
404 |
+
}
|
405 |
+
],
|
406 |
+
"source": [
|
407 |
+
"#Defining the news feature group\n",
|
408 |
+
"news_sentiment_fg = fs.get_or_create_feature_group(\n",
|
409 |
+
" name='news_sentiment_updated',\n",
|
410 |
+
" description='News sentiment from Polygon',\n",
|
411 |
+
" version=1,\n",
|
412 |
+
" primary_key=['ticker'],\n",
|
413 |
+
" event_time=['date'],\n",
|
414 |
+
" online_enabled=False,\n",
|
415 |
+
")"
|
416 |
+
]
|
417 |
+
},
|
418 |
+
{
|
419 |
+
"cell_type": "code",
|
420 |
+
"execution_count": 23,
|
421 |
+
"metadata": {},
|
422 |
+
"outputs": [
|
423 |
+
{
|
424 |
+
"name": "stdout",
|
425 |
+
"output_type": "stream",
|
426 |
+
"text": [
|
427 |
+
"Feature Group created successfully, explore it at \n",
|
428 |
+
"https://c.app.hopsworks.ai:443/p/693399/fs/689222/fg/787796\n"
|
429 |
+
]
|
430 |
+
},
|
431 |
+
{
|
432 |
+
"data": {
|
433 |
+
"application/vnd.jupyter.widget-view+json": {
|
434 |
+
"model_id": "524bb5481c34441ba708a4c14edac44b",
|
435 |
+
"version_major": 2,
|
436 |
+
"version_minor": 0
|
437 |
+
},
|
438 |
+
"text/plain": [
|
439 |
+
"Uploading Dataframe: 0.00% | | Rows 0/66 | Elapsed Time: 00:00 | Remaining Time: ?"
|
440 |
+
]
|
441 |
+
},
|
442 |
+
"metadata": {},
|
443 |
+
"output_type": "display_data"
|
444 |
+
},
|
445 |
+
{
|
446 |
+
"name": "stdout",
|
447 |
+
"output_type": "stream",
|
448 |
+
"text": [
|
449 |
+
"Launching job: news_sentiment_updated_1_offline_fg_materialization\n",
|
450 |
+
"Job started successfully, you can follow the progress at \n",
|
451 |
+
"https://c.app.hopsworks.ai/p/693399/jobs/named/news_sentiment_updated_1_offline_fg_materialization/executions\n"
|
452 |
+
]
|
453 |
+
},
|
454 |
+
{
|
455 |
+
"data": {
|
456 |
+
"text/plain": [
|
457 |
+
"(<hsfs.core.job.Job at 0x19c811c2e90>, None)"
|
458 |
+
]
|
459 |
+
},
|
460 |
+
"execution_count": 23,
|
461 |
+
"metadata": {},
|
462 |
+
"output_type": "execute_result"
|
463 |
+
}
|
464 |
+
],
|
465 |
+
"source": [
|
466 |
+
"#Inserting the news data into the news feature group\n",
|
467 |
+
"news_sentiment_fg.insert(news_df_updated)"
|
468 |
+
]
|
469 |
+
}
|
470 |
+
],
|
471 |
+
"metadata": {
|
472 |
+
"kernelspec": {
|
473 |
+
"display_name": "base",
|
474 |
+
"language": "python",
|
475 |
+
"name": "python3"
|
476 |
+
},
|
477 |
+
"language_info": {
|
478 |
+
"codemirror_mode": {
|
479 |
+
"name": "ipython",
|
480 |
+
"version": 3
|
481 |
+
},
|
482 |
+
"file_extension": ".py",
|
483 |
+
"mimetype": "text/x-python",
|
484 |
+
"name": "python",
|
485 |
+
"nbconvert_exporter": "python",
|
486 |
+
"pygments_lexer": "ipython3",
|
487 |
+
"version": "3.11.9"
|
488 |
+
},
|
489 |
+
"orig_nbformat": 4
|
490 |
+
},
|
491 |
+
"nbformat": 4,
|
492 |
+
"nbformat_minor": 2
|
493 |
+
}
|
feature_view.ipynb β Stocks news prediction/Notebooks/6_feature_view.ipynb
RENAMED
@@ -2,55 +2,31 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
-
"metadata": {},
|
7 |
-
"outputs": [],
|
8 |
-
"source": [
|
9 |
-
"# Import necessary libraries\n",
|
10 |
-
"import pandas as pd # For data manipulation using DataFrames\n",
|
11 |
-
"import numpy as np # For numerical operations\n",
|
12 |
-
"import matplotlib.pyplot as plt # For data visualization\n",
|
13 |
-
"import os # For operating system-related tasks\n",
|
14 |
-
"import joblib # For saving and loading models\n",
|
15 |
-
"import hopsworks # For getting access to hopsworks\n",
|
16 |
-
"\n",
|
17 |
-
"\n",
|
18 |
-
"\n",
|
19 |
-
"# Import specific modules from scikit-learn\n",
|
20 |
-
"from sklearn.preprocessing import StandardScaler, OneHotEncoder # For data preprocessing\n",
|
21 |
-
"from sklearn.metrics import accuracy_score # For evaluating model accuracy"
|
22 |
-
]
|
23 |
-
},
|
24 |
-
{
|
25 |
-
"cell_type": "code",
|
26 |
-
"execution_count": 2,
|
27 |
"metadata": {},
|
28 |
"outputs": [
|
29 |
{
|
30 |
"name": "stdout",
|
31 |
"output_type": "stream",
|
32 |
"text": [
|
|
|
|
|
|
|
|
|
|
|
33 |
" date 1. open 2. high 3. low 4. close 5. volume ticker\n",
|
34 |
"0 2024-05-03 182.10 184.78 178.4200 181.19 75491539.0 TSLA\n",
|
35 |
"1 2024-05-02 182.86 184.60 176.0200 180.01 89148041.0 TSLA\n",
|
36 |
"2 2024-05-01 182.00 185.86 179.0100 179.99 92829719.0 TSLA\n",
|
37 |
"3 2024-04-30 186.98 190.95 182.8401 183.28 127031787.0 TSLA\n",
|
38 |
"4 2024-04-29 188.42 198.87 184.5400 194.05 243869678.0 TSLA\n",
|
39 |
-
"
|
40 |
-
"\n",
|
41 |
-
"Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/693399\n",
|
42 |
-
"Connected. Call `.close()` to terminate connection gracefully.\n",
|
43 |
-
"Index(['date', 'open', 'high', 'low', 'close', 'volume', 'ticker'], dtype='object')\n",
|
44 |
-
"2024-05-06 13:44:59,122 WARNING: DeprecationWarning: Providing event_time as a single-element list is deprecated and will be dropped in future versions. Provide the feature_name string instead.\n",
|
45 |
-
"\n",
|
46 |
-
"Feature Group created successfully, explore it at \n",
|
47 |
-
"https://c.app.hopsworks.ai:443/p/693399/fs/689222/fg/787797\n"
|
48 |
]
|
49 |
},
|
50 |
{
|
51 |
"data": {
|
52 |
"application/vnd.jupyter.widget-view+json": {
|
53 |
-
"model_id": "
|
54 |
"version_major": 2,
|
55 |
"version_minor": 0
|
56 |
},
|
@@ -65,19 +41,15 @@
|
|
65 |
"name": "stdout",
|
66 |
"output_type": "stream",
|
67 |
"text": [
|
68 |
-
"Launching job:
|
69 |
"Job started successfully, you can follow the progress at \n",
|
70 |
-
"https://c.app.hopsworks.ai/p/693399/jobs/named/
|
71 |
-
"2024-05-06 13:45:08,516 WARNING: DeprecationWarning: Providing event_time as a single-element list is deprecated and will be dropped in future versions. Provide the feature_name string instead.\n",
|
72 |
-
"\n",
|
73 |
-
"Feature Group created successfully, explore it at \n",
|
74 |
-
"https://c.app.hopsworks.ai:443/p/693399/fs/689222/fg/785786\n"
|
75 |
]
|
76 |
},
|
77 |
{
|
78 |
"data": {
|
79 |
"application/vnd.jupyter.widget-view+json": {
|
80 |
-
"model_id": "
|
81 |
"version_major": 2,
|
82 |
"version_minor": 0
|
83 |
},
|
@@ -92,57 +64,11 @@
|
|
92 |
"name": "stdout",
|
93 |
"output_type": "stream",
|
94 |
"text": [
|
95 |
-
"Launching job:
|
96 |
"Job started successfully, you can follow the progress at \n",
|
97 |
-
"https://c.app.hopsworks.ai/p/693399/jobs/named/
|
98 |
-
]
|
99 |
-
}
|
100 |
-
],
|
101 |
-
"source": [
|
102 |
-
"from feature_pipeline import tesla_fg\n",
|
103 |
-
"from feature_pipeline import news_sentiment_fg"
|
104 |
-
]
|
105 |
-
},
|
106 |
-
{
|
107 |
-
"cell_type": "code",
|
108 |
-
"execution_count": 3,
|
109 |
-
"metadata": {},
|
110 |
-
"outputs": [
|
111 |
-
{
|
112 |
-
"data": {
|
113 |
-
"text/plain": [
|
114 |
-
"True"
|
115 |
-
]
|
116 |
-
},
|
117 |
-
"execution_count": 3,
|
118 |
-
"metadata": {},
|
119 |
-
"output_type": "execute_result"
|
120 |
-
}
|
121 |
-
],
|
122 |
-
"source": [
|
123 |
-
"from dotenv import load_dotenv\n",
|
124 |
-
"import os\n",
|
125 |
-
"\n",
|
126 |
-
"load_dotenv()"
|
127 |
-
]
|
128 |
-
},
|
129 |
-
{
|
130 |
-
"cell_type": "code",
|
131 |
-
"execution_count": 4,
|
132 |
-
"metadata": {},
|
133 |
-
"outputs": [
|
134 |
-
{
|
135 |
-
"name": "stdout",
|
136 |
-
"output_type": "stream",
|
137 |
-
"text": [
|
138 |
"Connection closed.\n",
|
139 |
-
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
140 |
-
]
|
141 |
-
},
|
142 |
-
{
|
143 |
-
"name": "stdout",
|
144 |
-
"output_type": "stream",
|
145 |
-
"text": [
|
146 |
"\n",
|
147 |
"Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/693399\n",
|
148 |
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
@@ -150,6 +76,23 @@
|
|
150 |
}
|
151 |
],
|
152 |
"source": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
"api_key = os.environ.get('hopsworks_api')\n",
|
154 |
"project = hopsworks.login(api_key_value=api_key)\n",
|
155 |
"fs = project.get_feature_store()"
|
@@ -161,17 +104,19 @@
|
|
161 |
"metadata": {},
|
162 |
"outputs": [],
|
163 |
"source": [
|
|
|
|
|
164 |
"def create_stocks_feature_view(fs, version):\n",
|
165 |
"\n",
|
166 |
" # Loading in the feature groups\n",
|
167 |
" tesla_fg = fs.get_feature_group('tesla_stock', version=1)\n",
|
168 |
" news_sentiment_fg = fs.get_feature_group('news_sentiment_updated', version=1)\n",
|
169 |
"\n",
|
170 |
-
" #
|
171 |
" ds_query = tesla_fg.select(['date', 'open', 'ticker'])\\\n",
|
172 |
" .join(news_sentiment_fg.select(['sentiment']))\n",
|
173 |
"\n",
|
174 |
-
" #
|
175 |
" feature_view = fs.create_feature_view(\n",
|
176 |
" name='tesla_stocks_fv',\n",
|
177 |
" query=ds_query,\n",
|
@@ -196,6 +141,7 @@
|
|
196 |
}
|
197 |
],
|
198 |
"source": [
|
|
|
199 |
"try:\n",
|
200 |
" feature_view = fs.get_feature_view(\"tesla_stocks_fv\", version=1)\n",
|
201 |
" tesla_fg = fs.get_feature_group('tesla_stock', version=1)\n",
|
@@ -209,6 +155,7 @@
|
|
209 |
"metadata": {},
|
210 |
"outputs": [],
|
211 |
"source": [
|
|
|
212 |
"def fix_data_from_feature_view(df,start_date,end_date):\n",
|
213 |
" df = df.sort_values(\"date\")\n",
|
214 |
" df = df.reset_index()\n",
|
@@ -230,41 +177,6 @@
|
|
230 |
" \n",
|
231 |
" return filtered_df"
|
232 |
]
|
233 |
-
},
|
234 |
-
{
|
235 |
-
"cell_type": "code",
|
236 |
-
"execution_count": 7,
|
237 |
-
"metadata": {},
|
238 |
-
"outputs": [],
|
239 |
-
"source": [
|
240 |
-
"#def create_stocks_feature_view(fs, version):\n",
|
241 |
-
"\n",
|
242 |
-
" #Loading in the feature groups\n",
|
243 |
-
"# tesla_fg = fs.get_feature_group('tesla_stock', version = 3)\n",
|
244 |
-
"# news_sentiment_fg = fs.get_feature_group('news_sentiment_updated', version = 2)\n",
|
245 |
-
"\n",
|
246 |
-
"# ds_query = tesla_fg.select(['date','open', 'ticker'])\\\n",
|
247 |
-
"# .join(news_sentiment_fg.select_except(['ticker','time', 'amp_url', 'image_url']))\n",
|
248 |
-
" \n",
|
249 |
-
"# return (fs.create_tesla_feature_view(\n",
|
250 |
-
"# name = 'tsla_stocks_fv',\n",
|
251 |
-
"# query = ds_query,\n",
|
252 |
-
"# labels=['ticker']\n",
|
253 |
-
"# ), tesla_fg)"
|
254 |
-
]
|
255 |
-
},
|
256 |
-
{
|
257 |
-
"cell_type": "code",
|
258 |
-
"execution_count": 8,
|
259 |
-
"metadata": {},
|
260 |
-
"outputs": [],
|
261 |
-
"source": [
|
262 |
-
"#try:\n",
|
263 |
-
"# feature_view = fs.get_feature_view(\"tsla_stocks_fv\", version=1)\n",
|
264 |
-
"# tesla_fg = fs.get_feature_group('tesla_stock', version=3)\n",
|
265 |
-
"#except:\n",
|
266 |
-
"# feature_view, tesla_fg = create_stocks_feature_view(fs, 1)"
|
267 |
-
]
|
268 |
}
|
269 |
],
|
270 |
"metadata": {
|
@@ -283,7 +195,7 @@
|
|
283 |
"name": "python",
|
284 |
"nbconvert_exporter": "python",
|
285 |
"pygments_lexer": "ipython3",
|
286 |
-
"version": "3.11.
|
287 |
}
|
288 |
},
|
289 |
"nbformat": 4,
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 3,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
"metadata": {},
|
7 |
"outputs": [
|
8 |
{
|
9 |
"name": "stdout",
|
10 |
"output_type": "stream",
|
11 |
"text": [
|
12 |
+
"Connection closed.\n",
|
13 |
+
"Connected. Call `.close()` to terminate connection gracefully.\n",
|
14 |
+
"\n",
|
15 |
+
"Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/693399\n",
|
16 |
+
"Connected. Call `.close()` to terminate connection gracefully.\n",
|
17 |
" date 1. open 2. high 3. low 4. close 5. volume ticker\n",
|
18 |
"0 2024-05-03 182.10 184.78 178.4200 181.19 75491539.0 TSLA\n",
|
19 |
"1 2024-05-02 182.86 184.60 176.0200 180.01 89148041.0 TSLA\n",
|
20 |
"2 2024-05-01 182.00 185.86 179.0100 179.99 92829719.0 TSLA\n",
|
21 |
"3 2024-04-30 186.98 190.95 182.8401 183.28 127031787.0 TSLA\n",
|
22 |
"4 2024-04-29 188.42 198.87 184.5400 194.05 243869678.0 TSLA\n",
|
23 |
+
"Index(['date', 'open', 'high', 'low', 'close', 'volume', 'ticker'], dtype='object')\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
]
|
25 |
},
|
26 |
{
|
27 |
"data": {
|
28 |
"application/vnd.jupyter.widget-view+json": {
|
29 |
+
"model_id": "db4ef90d03b0464f957c18365d8d636f",
|
30 |
"version_major": 2,
|
31 |
"version_minor": 0
|
32 |
},
|
|
|
41 |
"name": "stdout",
|
42 |
"output_type": "stream",
|
43 |
"text": [
|
44 |
+
"Launching job: tesla_stock_1_offline_fg_materialization\n",
|
45 |
"Job started successfully, you can follow the progress at \n",
|
46 |
+
"https://c.app.hopsworks.ai/p/693399/jobs/named/tesla_stock_1_offline_fg_materialization/executions\n"
|
|
|
|
|
|
|
|
|
47 |
]
|
48 |
},
|
49 |
{
|
50 |
"data": {
|
51 |
"application/vnd.jupyter.widget-view+json": {
|
52 |
+
"model_id": "9043e7043c1843288091f7c3a6bbd83e",
|
53 |
"version_major": 2,
|
54 |
"version_minor": 0
|
55 |
},
|
|
|
64 |
"name": "stdout",
|
65 |
"output_type": "stream",
|
66 |
"text": [
|
67 |
+
"Launching job: news_sentiment_updated_1_offline_fg_materialization\n",
|
68 |
"Job started successfully, you can follow the progress at \n",
|
69 |
+
"https://c.app.hopsworks.ai/p/693399/jobs/named/news_sentiment_updated_1_offline_fg_materialization/executions\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
"Connection closed.\n",
|
71 |
+
"Connected. Call `.close()` to terminate connection gracefully.\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
"\n",
|
73 |
"Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/693399\n",
|
74 |
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
|
|
76 |
}
|
77 |
],
|
78 |
"source": [
|
79 |
+
"# Importing necessary libraries\n",
|
80 |
+
"import pandas as pd # For data manipulation using DataFrames\n",
|
81 |
+
"import numpy as np # For numerical operations\n",
|
82 |
+
"import matplotlib.pyplot as plt # For data visualization\n",
|
83 |
+
"import os # For operating system-related tasks\n",
|
84 |
+
"import joblib # For saving and loading models\n",
|
85 |
+
"import hopsworks # For getting access to hopsworks\n",
|
86 |
+
"\n",
|
87 |
+
"from SML import feature_pipeline #Loading in the tesla_fg\n",
|
88 |
+
"\n",
|
89 |
+
"#Making the notebook able to fetch from the .env file\n",
|
90 |
+
"from dotenv import load_dotenv\n",
|
91 |
+
"import os\n",
|
92 |
+
"\n",
|
93 |
+
"load_dotenv()\n",
|
94 |
+
"\n",
|
95 |
+
"#Getting connected to hopsworks\n",
|
96 |
"api_key = os.environ.get('hopsworks_api')\n",
|
97 |
"project = hopsworks.login(api_key_value=api_key)\n",
|
98 |
"fs = project.get_feature_store()"
|
|
|
104 |
"metadata": {},
|
105 |
"outputs": [],
|
106 |
"source": [
|
107 |
+
"#Defining the function to create feature view\n",
|
108 |
+
"\n",
|
109 |
"def create_stocks_feature_view(fs, version):\n",
|
110 |
"\n",
|
111 |
" # Loading in the feature groups\n",
|
112 |
" tesla_fg = fs.get_feature_group('tesla_stock', version=1)\n",
|
113 |
" news_sentiment_fg = fs.get_feature_group('news_sentiment_updated', version=1)\n",
|
114 |
"\n",
|
115 |
+
" # Defining the query\n",
|
116 |
" ds_query = tesla_fg.select(['date', 'open', 'ticker'])\\\n",
|
117 |
" .join(news_sentiment_fg.select(['sentiment']))\n",
|
118 |
"\n",
|
119 |
+
" # Creating the feature view\n",
|
120 |
" feature_view = fs.create_feature_view(\n",
|
121 |
" name='tesla_stocks_fv',\n",
|
122 |
" query=ds_query,\n",
|
|
|
141 |
}
|
142 |
],
|
143 |
"source": [
|
144 |
+
"#Creating the feature view\n",
|
145 |
"try:\n",
|
146 |
" feature_view = fs.get_feature_view(\"tesla_stocks_fv\", version=1)\n",
|
147 |
" tesla_fg = fs.get_feature_group('tesla_stock', version=1)\n",
|
|
|
155 |
"metadata": {},
|
156 |
"outputs": [],
|
157 |
"source": [
|
158 |
+
"#Defining a function to get fixed data from the feature view\n",
|
159 |
"def fix_data_from_feature_view(df,start_date,end_date):\n",
|
160 |
" df = df.sort_values(\"date\")\n",
|
161 |
" df = df.reset_index()\n",
|
|
|
177 |
" \n",
|
178 |
" return filtered_df"
|
179 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
}
|
181 |
],
|
182 |
"metadata": {
|
|
|
195 |
"name": "python",
|
196 |
"nbconvert_exporter": "python",
|
197 |
"pygments_lexer": "ipython3",
|
198 |
+
"version": "3.11.9"
|
199 |
}
|
200 |
},
|
201 |
"nbformat": 4,
|
Stocks news prediction/Notebooks/7_training_pipeline.ipynb
ADDED
@@ -0,0 +1,839 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 2,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stdout",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"Connected. Call `.close()` to terminate connection gracefully.\n",
|
13 |
+
"\n",
|
14 |
+
"Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/693399\n",
|
15 |
+
"Connected. Call `.close()` to terminate connection gracefully.\n",
|
16 |
+
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
17 |
+
]
|
18 |
+
}
|
19 |
+
],
|
20 |
+
"source": [
|
21 |
+
"#Importing necessary libraries\n",
|
22 |
+
"import hopsworks\n",
|
23 |
+
"import hsfs\n",
|
24 |
+
"from dotenv import load_dotenv\n",
|
25 |
+
"import os\n",
|
26 |
+
"import pandas as pd\n",
|
27 |
+
"import numpy as np\n",
|
28 |
+
"from sklearn.preprocessing import OneHotEncoder\n",
|
29 |
+
"from sklearn.preprocessing import MinMaxScaler\n",
|
30 |
+
"from sklearn.metrics import mean_squared_error\n",
|
31 |
+
"from hsml.schema import Schema\n",
|
32 |
+
"from hsml.model_schema import ModelSchema\n",
|
33 |
+
"from tensorflow.keras.models import Sequential\n",
|
34 |
+
"from tensorflow.keras.layers import Input, LSTM, Dense, Dropout\n",
|
35 |
+
"from sklearn.preprocessing import StandardScaler # Import StandardScaler from scikit-learn\n",
|
36 |
+
"import joblib\n",
|
37 |
+
"\n",
|
38 |
+
"load_dotenv()\n",
|
39 |
+
"\n",
|
40 |
+
"#Connecting to hopsworks\n",
|
41 |
+
"api_key = os.environ.get('hopsworks_api')\n",
|
42 |
+
"project = hopsworks.login(api_key_value=api_key)\n",
|
43 |
+
"fs = project.get_feature_store()\n",
|
44 |
+
"\n",
|
45 |
+
"#Another connection to hopsworks\n",
|
46 |
+
"api_key = os.getenv('hopsworks_api')\n",
|
47 |
+
"connection = hsfs.connection()\n",
|
48 |
+
"fs = connection.get_feature_store()"
|
49 |
+
]
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"cell_type": "code",
|
53 |
+
"execution_count": 3,
|
54 |
+
"metadata": {},
|
55 |
+
"outputs": [],
|
56 |
+
"source": [
|
57 |
+
"#Getting the feature view\n",
|
58 |
+
"feature_view = fs.get_feature_view(\n",
|
59 |
+
" name='tesla_stocks_fv',\n",
|
60 |
+
" version=1\n",
|
61 |
+
")"
|
62 |
+
]
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"cell_type": "code",
|
66 |
+
"execution_count": 4,
|
67 |
+
"metadata": {},
|
68 |
+
"outputs": [],
|
69 |
+
"source": [
|
70 |
+
"#Setting up train & test split dates\n",
|
71 |
+
"train_start = \"2022-06-22\"\n",
|
72 |
+
"train_end = \"2023-12-31\"\n",
|
73 |
+
"\n",
|
74 |
+
"test_start = '2024-01-01'\n",
|
75 |
+
"test_end = \"2024-05-03\""
|
76 |
+
]
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"cell_type": "code",
|
80 |
+
"execution_count": 5,
|
81 |
+
"metadata": {},
|
82 |
+
"outputs": [
|
83 |
+
{
|
84 |
+
"name": "stdout",
|
85 |
+
"output_type": "stream",
|
86 |
+
"text": [
|
87 |
+
"Training dataset job started successfully, you can follow the progress at \n",
|
88 |
+
"https://c.app.hopsworks.ai/p/693399/jobs/named/tesla_stocks_fv_1_create_fv_td_07052024082715/executions\n",
|
89 |
+
"2024-05-07 10:28:31,852 WARNING: VersionWarning: Incremented version to `6`.\n",
|
90 |
+
"\n"
|
91 |
+
]
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"data": {
|
95 |
+
"text/plain": [
|
96 |
+
"(6, <hsfs.core.job.Job at 0x1c3ac2719d0>)"
|
97 |
+
]
|
98 |
+
},
|
99 |
+
"execution_count": 5,
|
100 |
+
"metadata": {},
|
101 |
+
"output_type": "execute_result"
|
102 |
+
}
|
103 |
+
],
|
104 |
+
"source": [
|
105 |
+
"#Creating the train/test split on the feature view with the split dates\n",
|
106 |
+
"feature_view.create_train_test_split(\n",
|
107 |
+
" train_start=train_start,\n",
|
108 |
+
" train_end=train_end,\n",
|
109 |
+
" test_start=test_start,\n",
|
110 |
+
" test_end=test_end,\n",
|
111 |
+
" data_format='csv',\n",
|
112 |
+
" coalesce= True,\n",
|
113 |
+
" statistics_config={'histogram':True,'correlations':True})"
|
114 |
+
]
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"cell_type": "code",
|
118 |
+
"execution_count": 6,
|
119 |
+
"metadata": {},
|
120 |
+
"outputs": [],
|
121 |
+
"source": [
|
122 |
+
"#Collecting the split from feature view\n",
|
123 |
+
"X_train, X_test, y_train, y_test = feature_view.get_train_test_split(6)"
|
124 |
+
]
|
125 |
+
},
|
126 |
+
{
|
127 |
+
"cell_type": "code",
|
128 |
+
"execution_count": 7,
|
129 |
+
"metadata": {},
|
130 |
+
"outputs": [
|
131 |
+
{
|
132 |
+
"data": {
|
133 |
+
"text/html": [
|
134 |
+
"<div>\n",
|
135 |
+
"<style scoped>\n",
|
136 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
137 |
+
" vertical-align: middle;\n",
|
138 |
+
" }\n",
|
139 |
+
"\n",
|
140 |
+
" .dataframe tbody tr th {\n",
|
141 |
+
" vertical-align: top;\n",
|
142 |
+
" }\n",
|
143 |
+
"\n",
|
144 |
+
" .dataframe thead th {\n",
|
145 |
+
" text-align: right;\n",
|
146 |
+
" }\n",
|
147 |
+
"</style>\n",
|
148 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
149 |
+
" <thead>\n",
|
150 |
+
" <tr style=\"text-align: right;\">\n",
|
151 |
+
" <th></th>\n",
|
152 |
+
" <th>date</th>\n",
|
153 |
+
" <th>ticker</th>\n",
|
154 |
+
" <th>sentiment</th>\n",
|
155 |
+
" </tr>\n",
|
156 |
+
" </thead>\n",
|
157 |
+
" <tbody>\n",
|
158 |
+
" <tr>\n",
|
159 |
+
" <th>0</th>\n",
|
160 |
+
" <td>2022-12-14T00:00:00.000Z</td>\n",
|
161 |
+
" <td>TSLA</td>\n",
|
162 |
+
" <td>0.102207</td>\n",
|
163 |
+
" </tr>\n",
|
164 |
+
" <tr>\n",
|
165 |
+
" <th>1</th>\n",
|
166 |
+
" <td>2023-02-21T00:00:00.000Z</td>\n",
|
167 |
+
" <td>TSLA</td>\n",
|
168 |
+
" <td>0.155833</td>\n",
|
169 |
+
" </tr>\n",
|
170 |
+
" <tr>\n",
|
171 |
+
" <th>2</th>\n",
|
172 |
+
" <td>2023-08-17T00:00:00.000Z</td>\n",
|
173 |
+
" <td>TSLA</td>\n",
|
174 |
+
" <td>0.024046</td>\n",
|
175 |
+
" </tr>\n",
|
176 |
+
" <tr>\n",
|
177 |
+
" <th>3</th>\n",
|
178 |
+
" <td>2022-09-16T00:00:00.000Z</td>\n",
|
179 |
+
" <td>TSLA</td>\n",
|
180 |
+
" <td>0.087306</td>\n",
|
181 |
+
" </tr>\n",
|
182 |
+
" <tr>\n",
|
183 |
+
" <th>4</th>\n",
|
184 |
+
" <td>2023-08-28T00:00:00.000Z</td>\n",
|
185 |
+
" <td>TSLA</td>\n",
|
186 |
+
" <td>0.024046</td>\n",
|
187 |
+
" </tr>\n",
|
188 |
+
" <tr>\n",
|
189 |
+
" <th>...</th>\n",
|
190 |
+
" <td>...</td>\n",
|
191 |
+
" <td>...</td>\n",
|
192 |
+
" <td>...</td>\n",
|
193 |
+
" </tr>\n",
|
194 |
+
" <tr>\n",
|
195 |
+
" <th>378</th>\n",
|
196 |
+
" <td>2023-02-10T00:00:00.000Z</td>\n",
|
197 |
+
" <td>TSLA</td>\n",
|
198 |
+
" <td>0.155833</td>\n",
|
199 |
+
" </tr>\n",
|
200 |
+
" <tr>\n",
|
201 |
+
" <th>379</th>\n",
|
202 |
+
" <td>2023-05-08T00:00:00.000Z</td>\n",
|
203 |
+
" <td>TSLA</td>\n",
|
204 |
+
" <td>0.141296</td>\n",
|
205 |
+
" </tr>\n",
|
206 |
+
" <tr>\n",
|
207 |
+
" <th>380</th>\n",
|
208 |
+
" <td>2022-09-08T00:00:00.000Z</td>\n",
|
209 |
+
" <td>TSLA</td>\n",
|
210 |
+
" <td>0.087306</td>\n",
|
211 |
+
" </tr>\n",
|
212 |
+
" <tr>\n",
|
213 |
+
" <th>381</th>\n",
|
214 |
+
" <td>2023-07-06T00:00:00.000Z</td>\n",
|
215 |
+
" <td>TSLA</td>\n",
|
216 |
+
" <td>0.119444</td>\n",
|
217 |
+
" </tr>\n",
|
218 |
+
" <tr>\n",
|
219 |
+
" <th>382</th>\n",
|
220 |
+
" <td>2023-10-27T00:00:00.000Z</td>\n",
|
221 |
+
" <td>TSLA</td>\n",
|
222 |
+
" <td>0.164868</td>\n",
|
223 |
+
" </tr>\n",
|
224 |
+
" </tbody>\n",
|
225 |
+
"</table>\n",
|
226 |
+
"<p>383 rows Γ 3 columns</p>\n",
|
227 |
+
"</div>"
|
228 |
+
],
|
229 |
+
"text/plain": [
|
230 |
+
" date ticker sentiment\n",
|
231 |
+
"0 2022-12-14T00:00:00.000Z TSLA 0.102207\n",
|
232 |
+
"1 2023-02-21T00:00:00.000Z TSLA 0.155833\n",
|
233 |
+
"2 2023-08-17T00:00:00.000Z TSLA 0.024046\n",
|
234 |
+
"3 2022-09-16T00:00:00.000Z TSLA 0.087306\n",
|
235 |
+
"4 2023-08-28T00:00:00.000Z TSLA 0.024046\n",
|
236 |
+
".. ... ... ...\n",
|
237 |
+
"378 2023-02-10T00:00:00.000Z TSLA 0.155833\n",
|
238 |
+
"379 2023-05-08T00:00:00.000Z TSLA 0.141296\n",
|
239 |
+
"380 2022-09-08T00:00:00.000Z TSLA 0.087306\n",
|
240 |
+
"381 2023-07-06T00:00:00.000Z TSLA 0.119444\n",
|
241 |
+
"382 2023-10-27T00:00:00.000Z TSLA 0.164868\n",
|
242 |
+
"\n",
|
243 |
+
"[383 rows x 3 columns]"
|
244 |
+
]
|
245 |
+
},
|
246 |
+
"execution_count": 7,
|
247 |
+
"metadata": {},
|
248 |
+
"output_type": "execute_result"
|
249 |
+
}
|
250 |
+
],
|
251 |
+
"source": [
|
252 |
+
"#Inspecting X_train\n",
|
253 |
+
"X_train"
|
254 |
+
]
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"cell_type": "code",
|
258 |
+
"execution_count": 8,
|
259 |
+
"metadata": {},
|
260 |
+
"outputs": [],
|
261 |
+
"source": [
|
262 |
+
"#Converting date into datetime\n",
|
263 |
+
"X_train['date'] = pd.to_datetime(X_train['date']).dt.date\n",
|
264 |
+
"X_test['date'] = pd.to_datetime(X_test['date']).dt.date\n",
|
265 |
+
"X_train['date'] = pd.to_datetime(X_train['date'])\n",
|
266 |
+
"X_test['date'] = pd.to_datetime(X_test['date'])"
|
267 |
+
]
|
268 |
+
},
|
269 |
+
{
|
270 |
+
"cell_type": "code",
|
271 |
+
"execution_count": 9,
|
272 |
+
"metadata": {},
|
273 |
+
"outputs": [
|
274 |
+
{
|
275 |
+
"data": {
|
276 |
+
"text/html": [
|
277 |
+
"<div>\n",
|
278 |
+
"<style scoped>\n",
|
279 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
280 |
+
" vertical-align: middle;\n",
|
281 |
+
" }\n",
|
282 |
+
"\n",
|
283 |
+
" .dataframe tbody tr th {\n",
|
284 |
+
" vertical-align: top;\n",
|
285 |
+
" }\n",
|
286 |
+
"\n",
|
287 |
+
" .dataframe thead th {\n",
|
288 |
+
" text-align: right;\n",
|
289 |
+
" }\n",
|
290 |
+
"</style>\n",
|
291 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
292 |
+
" <thead>\n",
|
293 |
+
" <tr style=\"text-align: right;\">\n",
|
294 |
+
" <th></th>\n",
|
295 |
+
" <th>date</th>\n",
|
296 |
+
" <th>ticker</th>\n",
|
297 |
+
" <th>sentiment</th>\n",
|
298 |
+
" </tr>\n",
|
299 |
+
" </thead>\n",
|
300 |
+
" <tbody>\n",
|
301 |
+
" <tr>\n",
|
302 |
+
" <th>0</th>\n",
|
303 |
+
" <td>2022-12-14</td>\n",
|
304 |
+
" <td>TSLA</td>\n",
|
305 |
+
" <td>0.102207</td>\n",
|
306 |
+
" </tr>\n",
|
307 |
+
" <tr>\n",
|
308 |
+
" <th>1</th>\n",
|
309 |
+
" <td>2023-02-21</td>\n",
|
310 |
+
" <td>TSLA</td>\n",
|
311 |
+
" <td>0.155833</td>\n",
|
312 |
+
" </tr>\n",
|
313 |
+
" <tr>\n",
|
314 |
+
" <th>2</th>\n",
|
315 |
+
" <td>2023-08-17</td>\n",
|
316 |
+
" <td>TSLA</td>\n",
|
317 |
+
" <td>0.024046</td>\n",
|
318 |
+
" </tr>\n",
|
319 |
+
" <tr>\n",
|
320 |
+
" <th>3</th>\n",
|
321 |
+
" <td>2022-09-16</td>\n",
|
322 |
+
" <td>TSLA</td>\n",
|
323 |
+
" <td>0.087306</td>\n",
|
324 |
+
" </tr>\n",
|
325 |
+
" <tr>\n",
|
326 |
+
" <th>4</th>\n",
|
327 |
+
" <td>2023-08-28</td>\n",
|
328 |
+
" <td>TSLA</td>\n",
|
329 |
+
" <td>0.024046</td>\n",
|
330 |
+
" </tr>\n",
|
331 |
+
" </tbody>\n",
|
332 |
+
"</table>\n",
|
333 |
+
"</div>"
|
334 |
+
],
|
335 |
+
"text/plain": [
|
336 |
+
" date ticker sentiment\n",
|
337 |
+
"0 2022-12-14 TSLA 0.102207\n",
|
338 |
+
"1 2023-02-21 TSLA 0.155833\n",
|
339 |
+
"2 2023-08-17 TSLA 0.024046\n",
|
340 |
+
"3 2022-09-16 TSLA 0.087306\n",
|
341 |
+
"4 2023-08-28 TSLA 0.024046"
|
342 |
+
]
|
343 |
+
},
|
344 |
+
"execution_count": 9,
|
345 |
+
"metadata": {},
|
346 |
+
"output_type": "execute_result"
|
347 |
+
}
|
348 |
+
],
|
349 |
+
"source": [
|
350 |
+
"X_train.head()"
|
351 |
+
]
|
352 |
+
},
|
353 |
+
{
|
354 |
+
"cell_type": "code",
|
355 |
+
"execution_count": 10,
|
356 |
+
"metadata": {},
|
357 |
+
"outputs": [],
|
358 |
+
"source": [
|
359 |
+
"# Extracting the 'ticker' column\n",
|
360 |
+
"tickers = X_train[['ticker']]\n",
|
361 |
+
"\n",
|
362 |
+
"# Initializing OneHotEncoder\n",
|
363 |
+
"encoder = OneHotEncoder()\n",
|
364 |
+
"\n",
|
365 |
+
"# Fitting and transforming the 'ticker' column\n",
|
366 |
+
"ticker_encoded = encoder.fit_transform(tickers)\n",
|
367 |
+
"\n",
|
368 |
+
"# Converting the encoded column into a DataFrame\n",
|
369 |
+
"ticker_encoded_df = pd.DataFrame(ticker_encoded.toarray(), columns=encoder.get_feature_names_out(['ticker']))\n",
|
370 |
+
"\n",
|
371 |
+
"# Concatenating the encoded DataFrame with the original DataFrame\n",
|
372 |
+
"X_train = pd.concat([X_train, ticker_encoded_df], axis=1)\n",
|
373 |
+
"\n",
|
374 |
+
"# Dropping the original 'ticker' column\n",
|
375 |
+
"X_train.drop('ticker', axis=1, inplace=True)"
|
376 |
+
]
|
377 |
+
},
|
378 |
+
{
|
379 |
+
"cell_type": "code",
|
380 |
+
"execution_count": 11,
|
381 |
+
"metadata": {},
|
382 |
+
"outputs": [
|
383 |
+
{
|
384 |
+
"data": {
|
385 |
+
"text/html": [
|
386 |
+
"<div>\n",
|
387 |
+
"<style scoped>\n",
|
388 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
389 |
+
" vertical-align: middle;\n",
|
390 |
+
" }\n",
|
391 |
+
"\n",
|
392 |
+
" .dataframe tbody tr th {\n",
|
393 |
+
" vertical-align: top;\n",
|
394 |
+
" }\n",
|
395 |
+
"\n",
|
396 |
+
" .dataframe thead th {\n",
|
397 |
+
" text-align: right;\n",
|
398 |
+
" }\n",
|
399 |
+
"</style>\n",
|
400 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
401 |
+
" <thead>\n",
|
402 |
+
" <tr style=\"text-align: right;\">\n",
|
403 |
+
" <th></th>\n",
|
404 |
+
" <th>date</th>\n",
|
405 |
+
" <th>sentiment</th>\n",
|
406 |
+
" <th>ticker_TSLA</th>\n",
|
407 |
+
" </tr>\n",
|
408 |
+
" </thead>\n",
|
409 |
+
" <tbody>\n",
|
410 |
+
" <tr>\n",
|
411 |
+
" <th>0</th>\n",
|
412 |
+
" <td>2022-12-14</td>\n",
|
413 |
+
" <td>0.102207</td>\n",
|
414 |
+
" <td>1.0</td>\n",
|
415 |
+
" </tr>\n",
|
416 |
+
" <tr>\n",
|
417 |
+
" <th>1</th>\n",
|
418 |
+
" <td>2023-02-21</td>\n",
|
419 |
+
" <td>0.155833</td>\n",
|
420 |
+
" <td>1.0</td>\n",
|
421 |
+
" </tr>\n",
|
422 |
+
" <tr>\n",
|
423 |
+
" <th>2</th>\n",
|
424 |
+
" <td>2023-08-17</td>\n",
|
425 |
+
" <td>0.024046</td>\n",
|
426 |
+
" <td>1.0</td>\n",
|
427 |
+
" </tr>\n",
|
428 |
+
" <tr>\n",
|
429 |
+
" <th>3</th>\n",
|
430 |
+
" <td>2022-09-16</td>\n",
|
431 |
+
" <td>0.087306</td>\n",
|
432 |
+
" <td>1.0</td>\n",
|
433 |
+
" </tr>\n",
|
434 |
+
" <tr>\n",
|
435 |
+
" <th>4</th>\n",
|
436 |
+
" <td>2023-08-28</td>\n",
|
437 |
+
" <td>0.024046</td>\n",
|
438 |
+
" <td>1.0</td>\n",
|
439 |
+
" </tr>\n",
|
440 |
+
" </tbody>\n",
|
441 |
+
"</table>\n",
|
442 |
+
"</div>"
|
443 |
+
],
|
444 |
+
"text/plain": [
|
445 |
+
" date sentiment ticker_TSLA\n",
|
446 |
+
"0 2022-12-14 0.102207 1.0\n",
|
447 |
+
"1 2023-02-21 0.155833 1.0\n",
|
448 |
+
"2 2023-08-17 0.024046 1.0\n",
|
449 |
+
"3 2022-09-16 0.087306 1.0\n",
|
450 |
+
"4 2023-08-28 0.024046 1.0"
|
451 |
+
]
|
452 |
+
},
|
453 |
+
"execution_count": 11,
|
454 |
+
"metadata": {},
|
455 |
+
"output_type": "execute_result"
|
456 |
+
}
|
457 |
+
],
|
458 |
+
"source": [
|
459 |
+
"#Inspecting X train after onehotencoding 'Ticker'\n",
|
460 |
+
"X_train.head()"
|
461 |
+
]
|
462 |
+
},
|
463 |
+
{
|
464 |
+
"cell_type": "code",
|
465 |
+
"execution_count": 12,
|
466 |
+
"metadata": {},
|
467 |
+
"outputs": [],
|
468 |
+
"source": [
|
469 |
+
"#Doing the same for X test as done to X train\n",
|
470 |
+
"\n",
|
471 |
+
"tickers = X_test[['ticker']]\n",
|
472 |
+
"\n",
|
473 |
+
"# Initializing OneHotEncoder\n",
|
474 |
+
"encoder = OneHotEncoder()\n",
|
475 |
+
"\n",
|
476 |
+
"# Fitting and transforming the 'ticker' column\n",
|
477 |
+
"ticker_encoded_test = encoder.fit_transform(tickers)\n",
|
478 |
+
"\n",
|
479 |
+
"# Converting the encoded column into a DataFrame\n",
|
480 |
+
"ticker_encoded_df_test = pd.DataFrame(ticker_encoded_test.toarray(), columns=encoder.get_feature_names_out(['ticker']))\n",
|
481 |
+
"\n",
|
482 |
+
"# Concatenating the encoded DataFrame with the original DataFrame\n",
|
483 |
+
"X_test = pd.concat([X_test, ticker_encoded_df_test], axis=1)\n",
|
484 |
+
"\n",
|
485 |
+
"# Dropping the original 'ticker' column\n",
|
486 |
+
"X_test.drop('ticker', axis=1, inplace=True)"
|
487 |
+
]
|
488 |
+
},
|
489 |
+
{
|
490 |
+
"cell_type": "code",
|
491 |
+
"execution_count": 13,
|
492 |
+
"metadata": {},
|
493 |
+
"outputs": [],
|
494 |
+
"source": [
|
495 |
+
"#Loading in MinMaxScaler to be used on the target variable 'open'\n",
|
496 |
+
"scaler = MinMaxScaler()\n",
|
497 |
+
"\n",
|
498 |
+
"# Fitting and transforming the 'open' column\n",
|
499 |
+
"y_train['open_scaled'] = scaler.fit_transform(y_train[['open']])\n",
|
500 |
+
"y_train.drop('open', axis=1, inplace=True)"
|
501 |
+
]
|
502 |
+
},
|
503 |
+
{
|
504 |
+
"cell_type": "code",
|
505 |
+
"execution_count": 14,
|
506 |
+
"metadata": {},
|
507 |
+
"outputs": [],
|
508 |
+
"source": [
|
509 |
+
"#Doing the same to y_test as done to y_train \n",
|
510 |
+
"y_test['open_scaled'] = scaler.fit_transform(y_test[['open']])\n",
|
511 |
+
"y_test.drop('open', axis=1, inplace=True)"
|
512 |
+
]
|
513 |
+
},
|
514 |
+
{
|
515 |
+
"cell_type": "code",
|
516 |
+
"execution_count": 15,
|
517 |
+
"metadata": {},
|
518 |
+
"outputs": [],
|
519 |
+
"source": [
|
520 |
+
"#Defining the function for the LSTM model\n",
|
521 |
+
"def create_model(input_shape,\n",
|
522 |
+
" LSTM_filters=64,\n",
|
523 |
+
" dropout=0.1,\n",
|
524 |
+
" recurrent_dropout=0.1,\n",
|
525 |
+
" dense_dropout=0.5,\n",
|
526 |
+
" activation='relu',\n",
|
527 |
+
" depth=1):\n",
|
528 |
+
"\n",
|
529 |
+
" model = Sequential()\n",
|
530 |
+
"\n",
|
531 |
+
" # Input layer\n",
|
532 |
+
" model.add(Input(shape=input_shape))\n",
|
533 |
+
"\n",
|
534 |
+
" if depth > 1:\n",
|
535 |
+
" for i in range(1, depth):\n",
|
536 |
+
" # Recurrent layer\n",
|
537 |
+
" model.add(LSTM(LSTM_filters, return_sequences=True, dropout=dropout, recurrent_dropout=recurrent_dropout))\n",
|
538 |
+
"\n",
|
539 |
+
" # Recurrent layer\n",
|
540 |
+
" model.add(LSTM(LSTM_filters, return_sequences=False, dropout=dropout, recurrent_dropout=recurrent_dropout))\n",
|
541 |
+
"\n",
|
542 |
+
" # Fully connected layer\n",
|
543 |
+
" if activation == 'relu':\n",
|
544 |
+
" model.add(Dense(LSTM_filters, activation='relu'))\n",
|
545 |
+
" elif activation == 'leaky_relu':\n",
|
546 |
+
" model.add(Dense(LSTM_filters))\n",
|
547 |
+
" model.add(tf.keras.layers.LeakyReLU(alpha=0.1))\n",
|
548 |
+
"\n",
|
549 |
+
" # Dropout for regularization\n",
|
550 |
+
" model.add(Dropout(dense_dropout))\n",
|
551 |
+
"\n",
|
552 |
+
" # Output layer for predicting one day forward\n",
|
553 |
+
" model.add(Dense(1, activation='linear'))\n",
|
554 |
+
"\n",
|
555 |
+
" # Compile the model\n",
|
556 |
+
" model.compile(optimizer='adam', loss='mse')\n",
|
557 |
+
"\n",
|
558 |
+
" return model"
|
559 |
+
]
|
560 |
+
},
|
561 |
+
{
|
562 |
+
"cell_type": "code",
|
563 |
+
"execution_count": 16,
|
564 |
+
"metadata": {},
|
565 |
+
"outputs": [
|
566 |
+
{
|
567 |
+
"name": "stdout",
|
568 |
+
"output_type": "stream",
|
569 |
+
"text": [
|
570 |
+
"2024-05-07 10:28:33,332 WARNING: DeprecationWarning: np.find_common_type is deprecated. Please use `np.result_type` or `np.promote_types`.\n",
|
571 |
+
"See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information. (Deprecated NumPy 1.25)\n",
|
572 |
+
"\n"
|
573 |
+
]
|
574 |
+
}
|
575 |
+
],
|
576 |
+
"source": [
|
577 |
+
"# As X_train['date'] column exists and is in datetime format, we're converting it\n",
|
578 |
+
"X_train['year'] = X_train['date'].dt.year\n",
|
579 |
+
"X_train['month'] = X_train['date'].dt.month\n",
|
580 |
+
"X_train['day'] = X_train['date'].dt.day\n",
|
581 |
+
"\n",
|
582 |
+
"# Dropping the original date column\n",
|
583 |
+
"X_train.drop(columns=['date'], inplace=True)\n",
|
584 |
+
"\n",
|
585 |
+
"# Converting dataframe to numpy array\n",
|
586 |
+
"X_train_array = X_train.to_numpy()\n",
|
587 |
+
"\n",
|
588 |
+
"# Reshaping the array to have a shape suitable for LSTM\n",
|
589 |
+
"X_train_array = np.expand_dims(X_train_array, axis=1)"
|
590 |
+
]
|
591 |
+
},
|
592 |
+
{
|
593 |
+
"cell_type": "code",
|
594 |
+
"execution_count": 17,
|
595 |
+
"metadata": {},
|
596 |
+
"outputs": [],
|
597 |
+
"source": [
|
598 |
+
"# Convert DataFrame to numpy array\n",
|
599 |
+
"X_train_array = X_train.values\n",
|
600 |
+
"\n",
|
601 |
+
"# Reshaping X_train_array to add a time step dimension\n",
|
602 |
+
"X_train_reshaped = X_train_array.reshape(X_train_array.shape[0], 1, X_train_array.shape[1])\n",
|
603 |
+
"\n",
|
604 |
+
"# Assuming X_train_reshaped shape is now (374, 1, 5)\n",
|
605 |
+
"input_shape = X_train_reshaped.shape[1:]\n",
|
606 |
+
"\n",
|
607 |
+
"# Create the model\n",
|
608 |
+
"model = create_model(input_shape=input_shape)"
|
609 |
+
]
|
610 |
+
},
|
611 |
+
{
|
612 |
+
"cell_type": "code",
|
613 |
+
"execution_count": 18,
|
614 |
+
"metadata": {},
|
615 |
+
"outputs": [
|
616 |
+
{
|
617 |
+
"name": "stdout",
|
618 |
+
"output_type": "stream",
|
619 |
+
"text": [
|
620 |
+
"\u001b[1m12/12\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 5ms/step - loss: 0.5131\n"
|
621 |
+
]
|
622 |
+
},
|
623 |
+
{
|
624 |
+
"data": {
|
625 |
+
"text/plain": [
|
626 |
+
"<keras.src.callbacks.history.History at 0x1c3aa79ff50>"
|
627 |
+
]
|
628 |
+
},
|
629 |
+
"execution_count": 18,
|
630 |
+
"metadata": {},
|
631 |
+
"output_type": "execute_result"
|
632 |
+
}
|
633 |
+
],
|
634 |
+
"source": [
|
635 |
+
"#Fitting the model on the training dataset\n",
|
636 |
+
"model.fit(X_train_reshaped, y_train)"
|
637 |
+
]
|
638 |
+
},
|
639 |
+
{
|
640 |
+
"cell_type": "code",
|
641 |
+
"execution_count": 19,
|
642 |
+
"metadata": {},
|
643 |
+
"outputs": [
|
644 |
+
{
|
645 |
+
"name": "stdout",
|
646 |
+
"output_type": "stream",
|
647 |
+
"text": [
|
648 |
+
"2024-05-07 10:28:39,020 WARNING: DeprecationWarning: np.find_common_type is deprecated. Please use `np.result_type` or `np.promote_types`.\n",
|
649 |
+
"See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information. (Deprecated NumPy 1.25)\n",
|
650 |
+
"\n"
|
651 |
+
]
|
652 |
+
}
|
653 |
+
],
|
654 |
+
"source": [
|
655 |
+
"# As X_test['date'] column exists and is in datetime format, we're converting it\n",
|
656 |
+
"X_test['year'] = X_test['date'].dt.year\n",
|
657 |
+
"X_test['month'] = X_test['date'].dt.month\n",
|
658 |
+
"X_test['day'] = X_test['date'].dt.day\n",
|
659 |
+
"\n",
|
660 |
+
"# Dropping the original date column\n",
|
661 |
+
"X_test.drop(columns=['date'], inplace=True)\n",
|
662 |
+
"\n",
|
663 |
+
"# Converting dataframe to numpy array\n",
|
664 |
+
"X_test_array = X_test.to_numpy()\n",
|
665 |
+
"\n",
|
666 |
+
"# Reshape the array to have a shape suitable for LSTM\n",
|
667 |
+
"X_test_array = np.expand_dims(X_test_array, axis=1)"
|
668 |
+
]
|
669 |
+
},
|
670 |
+
{
|
671 |
+
"cell_type": "code",
|
672 |
+
"execution_count": 20,
|
673 |
+
"metadata": {},
|
674 |
+
"outputs": [
|
675 |
+
{
|
676 |
+
"name": "stdout",
|
677 |
+
"output_type": "stream",
|
678 |
+
"text": [
|
679 |
+
"\u001b[1m3/3\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 307ms/step\n"
|
680 |
+
]
|
681 |
+
}
|
682 |
+
],
|
683 |
+
"source": [
|
684 |
+
"#Predicting y_pred with X_test\n",
|
685 |
+
"y_pred = model.predict(X_test_array)"
|
686 |
+
]
|
687 |
+
},
|
688 |
+
{
|
689 |
+
"cell_type": "code",
|
690 |
+
"execution_count": 21,
|
691 |
+
"metadata": {},
|
692 |
+
"outputs": [
|
693 |
+
{
|
694 |
+
"name": "stdout",
|
695 |
+
"output_type": "stream",
|
696 |
+
"text": [
|
697 |
+
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
698 |
+
]
|
699 |
+
}
|
700 |
+
],
|
701 |
+
"source": [
|
702 |
+
"#Conneting to hopsworks model registry\n",
|
703 |
+
"mr = project.get_model_registry()"
|
704 |
+
]
|
705 |
+
},
|
706 |
+
{
|
707 |
+
"cell_type": "code",
|
708 |
+
"execution_count": 22,
|
709 |
+
"metadata": {},
|
710 |
+
"outputs": [
|
711 |
+
{
|
712 |
+
"data": {
|
713 |
+
"text/plain": [
|
714 |
+
"{'RMSE': 0.3981142064349763}"
|
715 |
+
]
|
716 |
+
},
|
717 |
+
"execution_count": 22,
|
718 |
+
"metadata": {},
|
719 |
+
"output_type": "execute_result"
|
720 |
+
}
|
721 |
+
],
|
722 |
+
"source": [
|
723 |
+
"# Compute RMSE metric for filling the model\n",
|
724 |
+
"rmse = np.sqrt(mean_squared_error(y_test, y_pred))\n",
|
725 |
+
"rmse_metrics = {\"RMSE\": rmse}\n",
|
726 |
+
"rmse_metrics"
|
727 |
+
]
|
728 |
+
},
|
729 |
+
{
|
730 |
+
"cell_type": "code",
|
731 |
+
"execution_count": 23,
|
732 |
+
"metadata": {},
|
733 |
+
"outputs": [],
|
734 |
+
"source": [
|
735 |
+
"#Setting up the model schema\n",
|
736 |
+
"input_schema = Schema(X_train)\n",
|
737 |
+
"output_schema = Schema(y_train)\n",
|
738 |
+
"model_schema = ModelSchema(input_schema, output_schema)"
|
739 |
+
]
|
740 |
+
},
|
741 |
+
{
|
742 |
+
"cell_type": "code",
|
743 |
+
"execution_count": 24,
|
744 |
+
"metadata": {},
|
745 |
+
"outputs": [],
|
746 |
+
"source": [
|
747 |
+
"#Creating a file colled 'stock_model'\n",
|
748 |
+
"model_dir=\"stock_model\"\n",
|
749 |
+
"if os.path.isdir(model_dir) == False:\n",
|
750 |
+
" os.mkdir(model_dir)"
|
751 |
+
]
|
752 |
+
},
|
753 |
+
{
|
754 |
+
"cell_type": "code",
|
755 |
+
"execution_count": 25,
|
756 |
+
"metadata": {},
|
757 |
+
"outputs": [
|
758 |
+
{
|
759 |
+
"data": {
|
760 |
+
"application/vnd.jupyter.widget-view+json": {
|
761 |
+
"model_id": "a6169babeb154f54bdbb9b0b490333ab",
|
762 |
+
"version_major": 2,
|
763 |
+
"version_minor": 0
|
764 |
+
},
|
765 |
+
"text/plain": [
|
766 |
+
" 0%| | 0/6 [00:00<?, ?it/s]"
|
767 |
+
]
|
768 |
+
},
|
769 |
+
"metadata": {},
|
770 |
+
"output_type": "display_data"
|
771 |
+
},
|
772 |
+
{
|
773 |
+
"data": {
|
774 |
+
"application/vnd.jupyter.widget-view+json": {
|
775 |
+
"model_id": "f5749cebd1fe422dbeaba0ec2718a3f9",
|
776 |
+
"version_major": 2,
|
777 |
+
"version_minor": 0
|
778 |
+
},
|
779 |
+
"text/plain": [
|
780 |
+
"Uploading: 0.000%| | 0/561 elapsed<00:00 remaining<?"
|
781 |
+
]
|
782 |
+
},
|
783 |
+
"metadata": {},
|
784 |
+
"output_type": "display_data"
|
785 |
+
},
|
786 |
+
{
|
787 |
+
"name": "stdout",
|
788 |
+
"output_type": "stream",
|
789 |
+
"text": [
|
790 |
+
"Model created, explore it at https://c.app.hopsworks.ai:443/p/693399/models/stock_pred_model/6\n"
|
791 |
+
]
|
792 |
+
},
|
793 |
+
{
|
794 |
+
"data": {
|
795 |
+
"text/plain": [
|
796 |
+
"Model(name: 'stock_pred_model', version: 6)"
|
797 |
+
]
|
798 |
+
},
|
799 |
+
"execution_count": 25,
|
800 |
+
"metadata": {},
|
801 |
+
"output_type": "execute_result"
|
802 |
+
}
|
803 |
+
],
|
804 |
+
"source": [
|
805 |
+
"#Saving the model to hopsworks model registry\n",
|
806 |
+
"stock_pred_model = mr.tensorflow.create_model(\n",
|
807 |
+
" name=\"stock_pred_model\",\n",
|
808 |
+
" metrics= rmse_metrics,\n",
|
809 |
+
" model_schema=model_schema,\n",
|
810 |
+
" description=\"Stock Market TSLA Predictor from News Sentiment\",\n",
|
811 |
+
" )\n",
|
812 |
+
"\n",
|
813 |
+
"stock_pred_model.save(model_dir)"
|
814 |
+
]
|
815 |
+
}
|
816 |
+
],
|
817 |
+
"metadata": {
|
818 |
+
"kernelspec": {
|
819 |
+
"display_name": "base",
|
820 |
+
"language": "python",
|
821 |
+
"name": "python3"
|
822 |
+
},
|
823 |
+
"language_info": {
|
824 |
+
"codemirror_mode": {
|
825 |
+
"name": "ipython",
|
826 |
+
"version": 3
|
827 |
+
},
|
828 |
+
"file_extension": ".py",
|
829 |
+
"mimetype": "text/x-python",
|
830 |
+
"name": "python",
|
831 |
+
"nbconvert_exporter": "python",
|
832 |
+
"pygments_lexer": "ipython3",
|
833 |
+
"version": "3.11.9"
|
834 |
+
},
|
835 |
+
"orig_nbformat": 4
|
836 |
+
},
|
837 |
+
"nbformat": 4,
|
838 |
+
"nbformat_minor": 2
|
839 |
+
}
|
Stocks news prediction/Notebooks/8_inference_pipeline.ipynb
ADDED
@@ -0,0 +1,315 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stdout",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"Connected. Call `.close()` to terminate connection gracefully.\n",
|
13 |
+
"\n",
|
14 |
+
"Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/549016\n",
|
15 |
+
"Connected. Call `.close()` to terminate connection gracefully.\n",
|
16 |
+
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
17 |
+
]
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"name": "stderr",
|
21 |
+
"output_type": "stream",
|
22 |
+
"text": [
|
23 |
+
"../src/arrow/status.cc:137: DoAction result was not fully consumed: Cancelled: Flight cancelled call, with message: CANCELLED. Detail: Cancelled\n"
|
24 |
+
]
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"name": "stdout",
|
28 |
+
"output_type": "stream",
|
29 |
+
"text": [
|
30 |
+
"Training dataset job started successfully, you can follow the progress at \n",
|
31 |
+
"https://c.app.hopsworks.ai/p/549016/jobs/named/tesla_stocks_fv_1_create_fv_td_07052024090051/executions\n",
|
32 |
+
"2024-05-07 11:02:21,906 WARNING: VersionWarning: Incremented version to `1`.\n",
|
33 |
+
"\n",
|
34 |
+
"\u001b[1m12/12\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 3ms/step - loss: 0.5555\n",
|
35 |
+
"\u001b[1m3/3\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 61ms/step\n",
|
36 |
+
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
37 |
+
]
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"data": {
|
41 |
+
"application/vnd.jupyter.widget-view+json": {
|
42 |
+
"model_id": "1dd33e12e80548c99f5a605b28f82196",
|
43 |
+
"version_major": 2,
|
44 |
+
"version_minor": 0
|
45 |
+
},
|
46 |
+
"text/plain": [
|
47 |
+
" 0%| | 0/6 [00:00<?, ?it/s]"
|
48 |
+
]
|
49 |
+
},
|
50 |
+
"metadata": {},
|
51 |
+
"output_type": "display_data"
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"data": {
|
55 |
+
"application/vnd.jupyter.widget-view+json": {
|
56 |
+
"model_id": "b636479e09e94fb2a0c5736c2368aec4",
|
57 |
+
"version_major": 2,
|
58 |
+
"version_minor": 0
|
59 |
+
},
|
60 |
+
"text/plain": [
|
61 |
+
"Uploading: 0.000%| | 0/528 elapsed<00:00 remaining<?"
|
62 |
+
]
|
63 |
+
},
|
64 |
+
"metadata": {},
|
65 |
+
"output_type": "display_data"
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"name": "stdout",
|
69 |
+
"output_type": "stream",
|
70 |
+
"text": [
|
71 |
+
"Model created, explore it at https://c.app.hopsworks.ai:443/p/549016/models/stock_pred_model/7\n"
|
72 |
+
]
|
73 |
+
}
|
74 |
+
],
|
75 |
+
"source": [
|
76 |
+
"import pandas as pd \n",
|
77 |
+
"import hopsworks \n",
|
78 |
+
"from datetime import datetime, timedelta\n",
|
79 |
+
"from SML import training_pipeline"
|
80 |
+
]
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"cell_type": "code",
|
84 |
+
"execution_count": 2,
|
85 |
+
"metadata": {},
|
86 |
+
"outputs": [
|
87 |
+
{
|
88 |
+
"name": "stdout",
|
89 |
+
"output_type": "stream",
|
90 |
+
"text": [
|
91 |
+
"Connection closed.\n",
|
92 |
+
"Connected. Call `.close()` to terminate connection gracefully.\n",
|
93 |
+
"\n",
|
94 |
+
"Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/549016\n",
|
95 |
+
"Connected. Call `.close()` to terminate connection gracefully.\n",
|
96 |
+
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
97 |
+
]
|
98 |
+
}
|
99 |
+
],
|
100 |
+
"source": [
|
101 |
+
"project = hopsworks.login()\n",
|
102 |
+
"fs= project.get_feature_store()\n",
|
103 |
+
"mr = project.get_model_registry() "
|
104 |
+
]
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"cell_type": "code",
|
108 |
+
"execution_count": 3,
|
109 |
+
"metadata": {},
|
110 |
+
"outputs": [
|
111 |
+
{
|
112 |
+
"name": "stdout",
|
113 |
+
"output_type": "stream",
|
114 |
+
"text": [
|
115 |
+
"2024-05-06\n"
|
116 |
+
]
|
117 |
+
}
|
118 |
+
],
|
119 |
+
"source": [
|
120 |
+
"start_date = datetime.now() - timedelta(hours=24)\n",
|
121 |
+
"print(start_date.strftime(\"%Y-%m-%d\"))"
|
122 |
+
]
|
123 |
+
},
|
124 |
+
{
|
125 |
+
"cell_type": "code",
|
126 |
+
"execution_count": 12,
|
127 |
+
"metadata": {},
|
128 |
+
"outputs": [
|
129 |
+
{
|
130 |
+
"name": "stdout",
|
131 |
+
"output_type": "stream",
|
132 |
+
"text": [
|
133 |
+
"2024-05-07\n"
|
134 |
+
]
|
135 |
+
}
|
136 |
+
],
|
137 |
+
"source": [
|
138 |
+
"end_date = datetime.now().strftime(\"%Y-%m-%d\")\n",
|
139 |
+
"print(end_date)"
|
140 |
+
]
|
141 |
+
},
|
142 |
+
{
|
143 |
+
"cell_type": "code",
|
144 |
+
"execution_count": 13,
|
145 |
+
"metadata": {},
|
146 |
+
"outputs": [],
|
147 |
+
"source": [
|
148 |
+
"feature_view = fs.get_feature_view('tesla_stocks_fv', 1)\n",
|
149 |
+
"feature_view.init_batch_scoring(training_dataset_version=1)"
|
150 |
+
]
|
151 |
+
},
|
152 |
+
{
|
153 |
+
"cell_type": "code",
|
154 |
+
"execution_count": 14,
|
155 |
+
"metadata": {},
|
156 |
+
"outputs": [
|
157 |
+
{
|
158 |
+
"name": "stdout",
|
159 |
+
"output_type": "stream",
|
160 |
+
"text": [
|
161 |
+
"WITH right_fg0 AS (SELECT *\n",
|
162 |
+
"FROM (SELECT `fg1`.`date` `date`, `fg1`.`ticker` `ticker`, `fg1`.`ticker` `join_pk_ticker`, `fg1`.`date` `join_evt_date`, `fg0`.`sentiment` `sentiment`, RANK() OVER (PARTITION BY `fg1`.`ticker`, `fg1`.`date` ORDER BY `fg0`.`date` DESC) pit_rank_hopsworks\n",
|
163 |
+
"FROM `mtzeve_featurestore`.`tesla_stock_1` `fg1`\n",
|
164 |
+
"INNER JOIN `mtzeve_featurestore`.`news_sentiment_updated_1` `fg0` ON `fg1`.`ticker` = `fg0`.`ticker` AND `fg1`.`date` >= `fg0`.`date`) NA\n",
|
165 |
+
"WHERE `pit_rank_hopsworks` = 1) (SELECT `right_fg0`.`date` `date`, `right_fg0`.`ticker` `ticker`, `right_fg0`.`sentiment` `sentiment`\n",
|
166 |
+
"FROM right_fg0)\n"
|
167 |
+
]
|
168 |
+
}
|
169 |
+
],
|
170 |
+
"source": [
|
171 |
+
"print(feature_view.get_batch_query())"
|
172 |
+
]
|
173 |
+
},
|
174 |
+
{
|
175 |
+
"cell_type": "code",
|
176 |
+
"execution_count": 16,
|
177 |
+
"metadata": {},
|
178 |
+
"outputs": [
|
179 |
+
{
|
180 |
+
"name": "stdout",
|
181 |
+
"output_type": "stream",
|
182 |
+
"text": [
|
183 |
+
"Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.11s) \n"
|
184 |
+
]
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"data": {
|
188 |
+
"text/html": [
|
189 |
+
"<div>\n",
|
190 |
+
"<style scoped>\n",
|
191 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
192 |
+
" vertical-align: middle;\n",
|
193 |
+
" }\n",
|
194 |
+
"\n",
|
195 |
+
" .dataframe tbody tr th {\n",
|
196 |
+
" vertical-align: top;\n",
|
197 |
+
" }\n",
|
198 |
+
"\n",
|
199 |
+
" .dataframe thead th {\n",
|
200 |
+
" text-align: right;\n",
|
201 |
+
" }\n",
|
202 |
+
"</style>\n",
|
203 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
204 |
+
" <thead>\n",
|
205 |
+
" <tr style=\"text-align: right;\">\n",
|
206 |
+
" <th></th>\n",
|
207 |
+
" <th>date</th>\n",
|
208 |
+
" <th>ticker</th>\n",
|
209 |
+
" <th>sentiment</th>\n",
|
210 |
+
" </tr>\n",
|
211 |
+
" </thead>\n",
|
212 |
+
" <tbody>\n",
|
213 |
+
" </tbody>\n",
|
214 |
+
"</table>\n",
|
215 |
+
"</div>"
|
216 |
+
],
|
217 |
+
"text/plain": [
|
218 |
+
"Empty DataFrame\n",
|
219 |
+
"Columns: [date, ticker, sentiment]\n",
|
220 |
+
"Index: []"
|
221 |
+
]
|
222 |
+
},
|
223 |
+
"execution_count": 16,
|
224 |
+
"metadata": {},
|
225 |
+
"output_type": "execute_result"
|
226 |
+
}
|
227 |
+
],
|
228 |
+
"source": [
|
229 |
+
"# we had problems fetching the data from fv with get_batch_data function, tried everything and it just doesnt work \n",
|
230 |
+
"tsla_df_b = feature_view.get_batch_data(start_time = start_date, end_time = end_date)\n",
|
231 |
+
"tsla_df_b"
|
232 |
+
]
|
233 |
+
},
|
234 |
+
{
|
235 |
+
"cell_type": "code",
|
236 |
+
"execution_count": 11,
|
237 |
+
"metadata": {},
|
238 |
+
"outputs": [
|
239 |
+
{
|
240 |
+
"name": "stdout",
|
241 |
+
"output_type": "stream",
|
242 |
+
"text": [
|
243 |
+
"Downloading model artifact (0 dirs, 1 files)... DONE\r"
|
244 |
+
]
|
245 |
+
},
|
246 |
+
{
|
247 |
+
"ename": "FileNotFoundError",
|
248 |
+
"evalue": "[Errno 2] No such file or directory: '/var/folders/ty/fy7wpfqs4c39hnsfl21_rzyc0000gn/T/d6edbe1d-de39-488f-b12c-c0cbfd5ded37/stock_pred_model/7stock_model'",
|
249 |
+
"output_type": "error",
|
250 |
+
"traceback": [
|
251 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
252 |
+
"\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
|
253 |
+
"\u001b[1;32m/Users/manos/Documents/BDS/Mlops_mod_final/MLops_mod/inference_pipeline.ipynb Cell 8\u001b[0m line \u001b[0;36m5\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/Mlops_mod_final/MLops_mod/inference_pipeline.ipynb#X11sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m the_model \u001b[39m=\u001b[39m mr\u001b[39m.\u001b[39mget_model(\u001b[39m\"\u001b[39m\u001b[39mstock_pred_model\u001b[39m\u001b[39m\"\u001b[39m, version\u001b[39m=\u001b[39m\u001b[39m7\u001b[39m)\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/Mlops_mod_final/MLops_mod/inference_pipeline.ipynb#X11sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m model_dir \u001b[39m=\u001b[39m the_model\u001b[39m.\u001b[39mdownload()\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/Mlops_mod_final/MLops_mod/inference_pipeline.ipynb#X11sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m model \u001b[39m=\u001b[39m joblib\u001b[39m.\u001b[39mload(model_dir \u001b[39m+\u001b[39m \u001b[39m'\u001b[39m\u001b[39mstock_model\u001b[39m\u001b[39m'\u001b[39m)\n",
|
254 |
+
"File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/joblib/numpy_pickle.py:650\u001b[0m, in \u001b[0;36mload\u001b[0;34m(filename, mmap_mode)\u001b[0m\n\u001b[1;32m 648\u001b[0m obj \u001b[39m=\u001b[39m _unpickle(fobj)\n\u001b[1;32m 649\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 650\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(filename, \u001b[39m'\u001b[39m\u001b[39mrb\u001b[39m\u001b[39m'\u001b[39m) \u001b[39mas\u001b[39;00m f:\n\u001b[1;32m 651\u001b[0m \u001b[39mwith\u001b[39;00m _read_fileobject(f, filename, mmap_mode) \u001b[39mas\u001b[39;00m fobj:\n\u001b[1;32m 652\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(fobj, \u001b[39mstr\u001b[39m):\n\u001b[1;32m 653\u001b[0m \u001b[39m# if the returned file object is a string, this means we\u001b[39;00m\n\u001b[1;32m 654\u001b[0m \u001b[39m# try to load a pickle file generated with an version of\u001b[39;00m\n\u001b[1;32m 655\u001b[0m \u001b[39m# Joblib so we load it with joblib compatibility function.\u001b[39;00m\n",
|
255 |
+
"\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/var/folders/ty/fy7wpfqs4c39hnsfl21_rzyc0000gn/T/d6edbe1d-de39-488f-b12c-c0cbfd5ded37/stock_pred_model/7stock_model'"
|
256 |
+
]
|
257 |
+
}
|
258 |
+
],
|
259 |
+
"source": [
|
260 |
+
"import joblib\n",
|
261 |
+
"the_model = mr.get_model(\"stock_pred_model\", version=7)\n",
|
262 |
+
"model_dir = the_model.download()\n",
|
263 |
+
"\n",
|
264 |
+
"model = joblib.load(model_dir + 'stock_model')"
|
265 |
+
]
|
266 |
+
},
|
267 |
+
{
|
268 |
+
"cell_type": "code",
|
269 |
+
"execution_count": null,
|
270 |
+
"metadata": {},
|
271 |
+
"outputs": [],
|
272 |
+
"source": [
|
273 |
+
"predictions = model.predict(tsla_df_b)"
|
274 |
+
]
|
275 |
+
},
|
276 |
+
{
|
277 |
+
"cell_type": "code",
|
278 |
+
"execution_count": null,
|
279 |
+
"metadata": {},
|
280 |
+
"outputs": [],
|
281 |
+
"source": [
|
282 |
+
"predictions "
|
283 |
+
]
|
284 |
+
},
|
285 |
+
{
|
286 |
+
"cell_type": "code",
|
287 |
+
"execution_count": null,
|
288 |
+
"metadata": {},
|
289 |
+
"outputs": [],
|
290 |
+
"source": []
|
291 |
+
}
|
292 |
+
],
|
293 |
+
"metadata": {
|
294 |
+
"kernelspec": {
|
295 |
+
"display_name": "base",
|
296 |
+
"language": "python",
|
297 |
+
"name": "python3"
|
298 |
+
},
|
299 |
+
"language_info": {
|
300 |
+
"codemirror_mode": {
|
301 |
+
"name": "ipython",
|
302 |
+
"version": 3
|
303 |
+
},
|
304 |
+
"file_extension": ".py",
|
305 |
+
"mimetype": "text/x-python",
|
306 |
+
"name": "python",
|
307 |
+
"nbconvert_exporter": "python",
|
308 |
+
"pygments_lexer": "ipython3",
|
309 |
+
"version": "3.11.4"
|
310 |
+
},
|
311 |
+
"orig_nbformat": 4
|
312 |
+
},
|
313 |
+
"nbformat": 4,
|
314 |
+
"nbformat_minor": 2
|
315 |
+
}
|
Stocks news prediction/SML/__pycache__/feature_pipeline.cpython-311.pyc
ADDED
Binary file (2.74 kB). View file
|
|
Stocks news prediction/SML/__pycache__/news_preprocessing.cpython-311.pyc
ADDED
Binary file (2.54 kB). View file
|
|
feature_pipeline.py β Stocks news prediction/SML/feature_pipeline.py
RENAMED
@@ -1,10 +1,3 @@
|
|
1 |
-
# %%
|
2 |
-
from dotenv import load_dotenv
|
3 |
-
import os
|
4 |
-
|
5 |
-
# %%
|
6 |
-
#!pip install great_expectations==0.18.12
|
7 |
-
|
8 |
# %%
|
9 |
# Import necessary libraries
|
10 |
import pandas as pd # For data manipulation using DataFrames
|
@@ -13,119 +6,85 @@ import matplotlib.pyplot as plt # For data visualization
|
|
13 |
import os # For operating system-related tasks
|
14 |
import joblib # For saving and loading models
|
15 |
import hopsworks # For getting access to hopsworks
|
16 |
-
|
17 |
-
|
18 |
|
19 |
# Import specific modules from scikit-learn
|
20 |
from sklearn.preprocessing import StandardScaler, OneHotEncoder # For data preprocessing
|
21 |
from sklearn.metrics import accuracy_score # For evaluating model accuracy
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
#load_dotenv()
|
28 |
-
|
29 |
-
#api_key = os.environ.get('stocks_api') # Replace this with your actual API key
|
30 |
-
#ts = TimeSeries(key=api_key, output_format='pandas')
|
31 |
-
|
32 |
-
# Fetch daily adjusted stock prices; adjust the symbol as needed
|
33 |
-
#data, meta_data = ts.get_daily(symbol='TSLA', outputsize='full')
|
34 |
-
|
35 |
-
#print(data.head())
|
36 |
-
|
37 |
-
# %%
|
38 |
-
#data.info()
|
39 |
-
|
40 |
-
# %%
|
41 |
-
#meta_data
|
42 |
-
|
43 |
-
# %%
|
44 |
-
# Define your file path and name
|
45 |
-
#file_path = 'TSLA_stock_price.csv' # Customize the path and filename
|
46 |
-
|
47 |
-
# Save the DataFrame to CSV
|
48 |
-
#stock_data.to_csv(file_path)
|
49 |
-
|
50 |
-
#print(f"Data saved to {file_path}")
|
51 |
-
|
52 |
-
|
53 |
-
# %%
|
54 |
-
# Load and display the data from CSV to confirm
|
55 |
-
tsla_df = pd.read_csv('TSLA_stock_price.csv')
|
56 |
-
print(tsla_df.head())
|
57 |
-
|
58 |
|
59 |
-
#
|
60 |
api_key = os.environ.get('hopsworks_api')
|
61 |
project = hopsworks.login(api_key_value=api_key)
|
62 |
fs = project.get_feature_store()
|
63 |
|
64 |
# %%
|
65 |
-
|
|
|
|
|
66 |
|
67 |
# %%
|
|
|
68 |
def clean_column_name(name):
|
69 |
# Remove all non-letter characters
|
70 |
cleaned_name = re.sub(r'[^a-zA-Z]', '', name)
|
71 |
return cleaned_name
|
72 |
|
73 |
-
|
74 |
# %%
|
75 |
tsla_df
|
76 |
|
77 |
# %%
|
78 |
-
#
|
79 |
tsla_df.columns = [clean_column_name(col) for col in tsla_df.columns]
|
80 |
-
|
81 |
-
|
82 |
-
# %%
|
83 |
print(tsla_df.columns)
|
84 |
|
85 |
-
|
86 |
# %%
|
87 |
-
|
88 |
-
|
89 |
-
# Assuming tsla_df is your pandas DataFrame
|
90 |
-
# Convert the "date" column to timestamp
|
91 |
tsla_df['date'] = pd.to_datetime(tsla_df['date'])
|
92 |
|
93 |
-
|
94 |
# %%
|
95 |
-
#
|
96 |
tesla_fg = fs.get_or_create_feature_group(
|
97 |
name="tesla_stock",
|
98 |
description="Tesla stock dataset from alpha vantage",
|
99 |
-
version=
|
100 |
primary_key=["ticker"],
|
101 |
event_time=['date'],
|
102 |
online_enabled=False,
|
103 |
)
|
104 |
|
105 |
# %%
|
|
|
106 |
tesla_fg.insert(tsla_df, write_options={"wait_for_job" : False})
|
107 |
|
108 |
# %%
|
|
|
109 |
news_df = pd.read_csv('news_articles_ema.csv')
|
110 |
|
111 |
-
|
112 |
# %%
|
|
|
113 |
news_df_updated = news_df.drop(columns=['exp_mean_7_days'])
|
114 |
|
115 |
# %%
|
|
|
116 |
news_df_updated['date'] = pd.to_datetime(news_df_updated['date'])
|
117 |
|
118 |
# %%
|
|
|
119 |
news_sentiment_fg = fs.get_or_create_feature_group(
|
120 |
name='news_sentiment_updated',
|
121 |
description='News sentiment from Polygon',
|
122 |
-
version=
|
123 |
primary_key=['ticker'],
|
124 |
event_time=['date'],
|
125 |
online_enabled=False,
|
126 |
)
|
127 |
|
128 |
# %%
|
|
|
129 |
news_sentiment_fg.insert(news_df_updated)
|
130 |
|
131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# %%
|
2 |
# Import necessary libraries
|
3 |
import pandas as pd # For data manipulation using DataFrames
|
|
|
6 |
import os # For operating system-related tasks
|
7 |
import joblib # For saving and loading models
|
8 |
import hopsworks # For getting access to hopsworks
|
9 |
+
import re
|
|
|
10 |
|
11 |
# Import specific modules from scikit-learn
|
12 |
from sklearn.preprocessing import StandardScaler, OneHotEncoder # For data preprocessing
|
13 |
from sklearn.metrics import accuracy_score # For evaluating model accuracy
|
14 |
|
15 |
+
from dotenv import load_dotenv
|
16 |
+
import os
|
17 |
+
load_dotenv()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
+
#Connecting to hopsworks
|
20 |
api_key = os.environ.get('hopsworks_api')
|
21 |
project = hopsworks.login(api_key_value=api_key)
|
22 |
fs = project.get_feature_store()
|
23 |
|
24 |
# %%
|
25 |
+
# Load and display the data from CSV to confirm
|
26 |
+
tsla_df = pd.read_csv('TSLA_stock_price.csv')
|
27 |
+
print(tsla_df.head())
|
28 |
|
29 |
# %%
|
30 |
+
#Defining a function to clean the column names
|
31 |
def clean_column_name(name):
|
32 |
# Remove all non-letter characters
|
33 |
cleaned_name = re.sub(r'[^a-zA-Z]', '', name)
|
34 |
return cleaned_name
|
35 |
|
|
|
36 |
# %%
|
37 |
tsla_df
|
38 |
|
39 |
# %%
|
40 |
+
# Cleaning up column names for 'tsla_df'
|
41 |
tsla_df.columns = [clean_column_name(col) for col in tsla_df.columns]
|
|
|
|
|
|
|
42 |
print(tsla_df.columns)
|
43 |
|
|
|
44 |
# %%
|
45 |
+
# Converting the "date" column to timestamp
|
|
|
|
|
|
|
46 |
tsla_df['date'] = pd.to_datetime(tsla_df['date'])
|
47 |
|
|
|
48 |
# %%
|
49 |
+
# Defining the stocks feature group
|
50 |
tesla_fg = fs.get_or_create_feature_group(
|
51 |
name="tesla_stock",
|
52 |
description="Tesla stock dataset from alpha vantage",
|
53 |
+
version=1,
|
54 |
primary_key=["ticker"],
|
55 |
event_time=['date'],
|
56 |
online_enabled=False,
|
57 |
)
|
58 |
|
59 |
# %%
|
60 |
+
#Inserting the stock data into the stocks feature group
|
61 |
tesla_fg.insert(tsla_df, write_options={"wait_for_job" : False})
|
62 |
|
63 |
# %%
|
64 |
+
#Collecting news df
|
65 |
news_df = pd.read_csv('news_articles_ema.csv')
|
66 |
|
|
|
67 |
# %%
|
68 |
+
#Dropping exp mean 7 days
|
69 |
news_df_updated = news_df.drop(columns=['exp_mean_7_days'])
|
70 |
|
71 |
# %%
|
72 |
+
#Updating date to datetime
|
73 |
news_df_updated['date'] = pd.to_datetime(news_df_updated['date'])
|
74 |
|
75 |
# %%
|
76 |
+
#Defining the news feature group
|
77 |
news_sentiment_fg = fs.get_or_create_feature_group(
|
78 |
name='news_sentiment_updated',
|
79 |
description='News sentiment from Polygon',
|
80 |
+
version=1,
|
81 |
primary_key=['ticker'],
|
82 |
event_time=['date'],
|
83 |
online_enabled=False,
|
84 |
)
|
85 |
|
86 |
# %%
|
87 |
+
#Inserting the news data into the news feature group
|
88 |
news_sentiment_fg.insert(news_df_updated)
|
89 |
|
90 |
|
feature_view.py β Stocks news prediction/SML/feature_view.py
RENAMED
@@ -1,5 +1,5 @@
|
|
1 |
# %%
|
2 |
-
#
|
3 |
import pandas as pd # For data manipulation using DataFrames
|
4 |
import numpy as np # For numerical operations
|
5 |
import matplotlib.pyplot as plt # For data visualization
|
@@ -7,39 +7,34 @@ import os # For operating system-related tasks
|
|
7 |
import joblib # For saving and loading models
|
8 |
import hopsworks # For getting access to hopsworks
|
9 |
|
|
|
|
|
10 |
|
11 |
-
|
12 |
-
# Import specific modules from scikit-learn
|
13 |
-
from sklearn.preprocessing import StandardScaler, OneHotEncoder # For data preprocessing
|
14 |
-
from sklearn.metrics import accuracy_score # For evaluating model accuracy
|
15 |
-
|
16 |
-
# %%
|
17 |
-
from feature_pipeline import tesla_fg
|
18 |
-
from feature_pipeline import news_sentiment_fg
|
19 |
-
|
20 |
-
# %%
|
21 |
from dotenv import load_dotenv
|
22 |
import os
|
23 |
|
24 |
load_dotenv()
|
25 |
|
26 |
-
#
|
27 |
api_key = os.environ.get('hopsworks_api')
|
28 |
project = hopsworks.login(api_key_value=api_key)
|
29 |
fs = project.get_feature_store()
|
30 |
|
31 |
# %%
|
|
|
|
|
32 |
def create_stocks_feature_view(fs, version):
|
33 |
|
34 |
# Loading in the feature groups
|
35 |
tesla_fg = fs.get_feature_group('tesla_stock', version=1)
|
36 |
news_sentiment_fg = fs.get_feature_group('news_sentiment_updated', version=1)
|
37 |
|
38 |
-
#
|
39 |
ds_query = tesla_fg.select(['date', 'open', 'ticker'])\
|
40 |
.join(news_sentiment_fg.select(['sentiment']))
|
41 |
|
42 |
-
#
|
43 |
feature_view = fs.create_feature_view(
|
44 |
name='tesla_stocks_fv',
|
45 |
query=ds_query,
|
@@ -49,6 +44,7 @@ def create_stocks_feature_view(fs, version):
|
|
49 |
return feature_view, tesla_fg
|
50 |
|
51 |
# %%
|
|
|
52 |
try:
|
53 |
feature_view = fs.get_feature_view("tesla_stocks_fv", version=1)
|
54 |
tesla_fg = fs.get_feature_group('tesla_stock', version=1)
|
@@ -56,6 +52,7 @@ except:
|
|
56 |
feature_view, tesla_fg = create_stocks_feature_view(fs, 1)
|
57 |
|
58 |
# %%
|
|
|
59 |
def fix_data_from_feature_view(df,start_date,end_date):
|
60 |
df = df.sort_values("date")
|
61 |
df = df.reset_index()
|
@@ -77,27 +74,4 @@ def fix_data_from_feature_view(df,start_date,end_date):
|
|
77 |
|
78 |
return filtered_df
|
79 |
|
80 |
-
# %%
|
81 |
-
#def create_stocks_feature_view(fs, version):
|
82 |
-
|
83 |
-
#Loading in the feature groups
|
84 |
-
# tesla_fg = fs.get_feature_group('tesla_stock', version = 3)
|
85 |
-
# news_sentiment_fg = fs.get_feature_group('news_sentiment_updated', version = 2)
|
86 |
-
|
87 |
-
# ds_query = tesla_fg.select(['date','open', 'ticker'])\
|
88 |
-
# .join(news_sentiment_fg.select_except(['ticker','time', 'amp_url', 'image_url']))
|
89 |
-
|
90 |
-
# return (fs.create_tesla_feature_view(
|
91 |
-
# name = 'tsla_stocks_fv',
|
92 |
-
# query = ds_query,
|
93 |
-
# labels=['ticker']
|
94 |
-
# ), tesla_fg)
|
95 |
-
|
96 |
-
# %%
|
97 |
-
#try:
|
98 |
-
# feature_view = fs.get_feature_view("tsla_stocks_fv", version=1)
|
99 |
-
# tesla_fg = fs.get_feature_group('tesla_stock', version=3)
|
100 |
-
#except:
|
101 |
-
# feature_view, tesla_fg = create_stocks_feature_view(fs, 1)
|
102 |
-
|
103 |
|
|
|
1 |
# %%
|
2 |
+
# Importing necessary libraries
|
3 |
import pandas as pd # For data manipulation using DataFrames
|
4 |
import numpy as np # For numerical operations
|
5 |
import matplotlib.pyplot as plt # For data visualization
|
|
|
7 |
import joblib # For saving and loading models
|
8 |
import hopsworks # For getting access to hopsworks
|
9 |
|
10 |
+
from feature_pipeline import tesla_fg #Loading in the tesla_fg
|
11 |
+
from feature_pipeline import news_sentiment_fg #Loading in the news_fg
|
12 |
|
13 |
+
#Making the notebook able to fetch from the .env file
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
from dotenv import load_dotenv
|
15 |
import os
|
16 |
|
17 |
load_dotenv()
|
18 |
|
19 |
+
#Getting connected to hopsworks
|
20 |
api_key = os.environ.get('hopsworks_api')
|
21 |
project = hopsworks.login(api_key_value=api_key)
|
22 |
fs = project.get_feature_store()
|
23 |
|
24 |
# %%
|
25 |
+
#Defining the function to create feature view
|
26 |
+
|
27 |
def create_stocks_feature_view(fs, version):
|
28 |
|
29 |
# Loading in the feature groups
|
30 |
tesla_fg = fs.get_feature_group('tesla_stock', version=1)
|
31 |
news_sentiment_fg = fs.get_feature_group('news_sentiment_updated', version=1)
|
32 |
|
33 |
+
# Defining the query
|
34 |
ds_query = tesla_fg.select(['date', 'open', 'ticker'])\
|
35 |
.join(news_sentiment_fg.select(['sentiment']))
|
36 |
|
37 |
+
# Creating the feature view
|
38 |
feature_view = fs.create_feature_view(
|
39 |
name='tesla_stocks_fv',
|
40 |
query=ds_query,
|
|
|
44 |
return feature_view, tesla_fg
|
45 |
|
46 |
# %%
|
47 |
+
#Creating the feature view
|
48 |
try:
|
49 |
feature_view = fs.get_feature_view("tesla_stocks_fv", version=1)
|
50 |
tesla_fg = fs.get_feature_group('tesla_stock', version=1)
|
|
|
52 |
feature_view, tesla_fg = create_stocks_feature_view(fs, 1)
|
53 |
|
54 |
# %%
|
55 |
+
#Defining a function to get fixed data from the feature view
|
56 |
def fix_data_from_feature_view(df,start_date,end_date):
|
57 |
df = df.sort_values("date")
|
58 |
df = df.reset_index()
|
|
|
74 |
|
75 |
return filtered_df
|
76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
Stocks news prediction/SML/historical_news.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# %%
|
2 |
+
#Importing necessary libraries
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
from datetime import datetime, timedelta
|
5 |
+
import requests
|
6 |
+
import os
|
7 |
+
import time
|
8 |
+
import pandas as pd
|
9 |
+
from news_preprocessing import * #Importing everything from 'news_preprocessing'
|
10 |
+
load_dotenv()
|
11 |
+
|
12 |
+
# %%
|
13 |
+
#Defining a function for fetching news
|
14 |
+
|
15 |
+
def fetch_news(api_key, ticker, start_date, end_date):
|
16 |
+
base_url = os.environ.get("endpointnewsp")
|
17 |
+
headers = {"Authorization": f"Bearer {api_key}"}
|
18 |
+
all_news = []
|
19 |
+
|
20 |
+
current_date = start_date
|
21 |
+
|
22 |
+
while current_date <= end_date:
|
23 |
+
batch_end_date = current_date + timedelta(days=50)
|
24 |
+
if batch_end_date > end_date:
|
25 |
+
batch_end_date = end_date
|
26 |
+
|
27 |
+
params = {
|
28 |
+
"ticker": ticker,
|
29 |
+
"published_utc.gte": current_date.strftime('%Y-%m-%d'),
|
30 |
+
"published_utc.lte": batch_end_date.strftime('%Y-%m-%d'),
|
31 |
+
"limit": 50,
|
32 |
+
"sort": "published_utc"
|
33 |
+
}
|
34 |
+
|
35 |
+
try:
|
36 |
+
response = requests.get(base_url, headers=headers, params=params)
|
37 |
+
if response.status_code == 200:
|
38 |
+
data = response.json()
|
39 |
+
articles = data.get('results', [])
|
40 |
+
|
41 |
+
# Creating a DataFrame from articles
|
42 |
+
df = pd.DataFrame(articles)
|
43 |
+
|
44 |
+
# Adding primary_key column if ticker is found
|
45 |
+
df['ticker'] = df['tickers'].apply(lambda x: ticker if ticker in x else None)
|
46 |
+
|
47 |
+
all_news.append(df) # Append DataFrame to the list
|
48 |
+
print(f"Fetched {len(articles)} articles from {current_date.strftime('%Y-%m-%d')} to {batch_end_date.strftime('%Y-%m-%d')}")
|
49 |
+
current_date = batch_end_date + timedelta(days=1)
|
50 |
+
elif response.status_code == 429:
|
51 |
+
print("Rate limit reached. Waiting to retry...")
|
52 |
+
time.sleep(60) # Wait for 60 seconds or as recommended by the API
|
53 |
+
continue # Retry the current request
|
54 |
+
else:
|
55 |
+
print(f"Failed to fetch data: {response.status_code}, {response.text}")
|
56 |
+
break
|
57 |
+
except Exception as e:
|
58 |
+
print(f"An error occurred: {e}")
|
59 |
+
break
|
60 |
+
|
61 |
+
return pd.concat(all_news, ignore_index=True)
|
62 |
+
|
63 |
+
#Usage
|
64 |
+
api_key = os.environ.get('newsp_api')
|
65 |
+
ticker = 'TSLA'
|
66 |
+
end_date = datetime.now() - timedelta(days=1) # Yesterday's date
|
67 |
+
start_date = end_date - timedelta(days=365 * 2)
|
68 |
+
news_articles = fetch_news(api_key, ticker, start_date, end_date)
|
69 |
+
print(f"Total articles fetched: {len(news_articles)}")
|
70 |
+
|
71 |
+
|
72 |
+
# %%
|
73 |
+
# Process the news articles
|
74 |
+
df = process_news_articles(news_articles)
|
75 |
+
|
76 |
+
# %%
|
77 |
+
df.info()
|
78 |
+
|
79 |
+
# %%
|
80 |
+
df.head()
|
81 |
+
|
82 |
+
# %%
|
83 |
+
df= df.sort_index(ascending=False)
|
84 |
+
|
85 |
+
# %%
|
86 |
+
#Putting the news articles into a csv
|
87 |
+
df.to_csv('news_articles.csv', index=False)
|
88 |
+
|
89 |
+
# %%
|
90 |
+
df_processed = exponential_moving_average(df, window=7)
|
91 |
+
|
92 |
+
# %%
|
93 |
+
df_processed.to_csv('news_articles_ema.csv', index=False)
|
94 |
+
|
95 |
+
# %%
|
96 |
+
df_processed.head()
|
97 |
+
|
98 |
+
# %%
|
99 |
+
df_processed.tail()
|
100 |
+
|
101 |
+
# %%
|
102 |
+
print(df_processed['date'].min())
|
103 |
+
print(df_processed['date'].max())
|
104 |
+
|
105 |
+
# %%
|
106 |
+
print(df_processed['date'].max() - df_processed['date'].min())
|
107 |
+
|
108 |
+
# %%
|
109 |
+
df_processed.shape
|
110 |
+
|
111 |
+
# %%
|
112 |
+
duplicates = df_processed[df_processed.duplicated('date')]
|
113 |
+
|
114 |
+
# %%
|
115 |
+
duplicates.shape
|
116 |
+
|
117 |
+
# %%
|
118 |
+
df_processed.head()
|
119 |
+
|
120 |
+
|
Stocks news prediction/SML/historical_stock.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# %%
|
2 |
+
#Importing necessary librabries
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
import os
|
5 |
+
from alpha_vantage.timeseries import TimeSeries
|
6 |
+
import pandas as pd
|
7 |
+
import hopsworks
|
8 |
+
import re
|
9 |
+
import modal
|
10 |
+
#prepocessing
|
11 |
+
import requests
|
12 |
+
import pandas as pd
|
13 |
+
import json
|
14 |
+
#import pandas_market_calendars as mcal
|
15 |
+
import datetime
|
16 |
+
import numpy as np
|
17 |
+
from datetime import timedelta
|
18 |
+
load_dotenv() #Making the .env file work
|
19 |
+
|
20 |
+
# %%
|
21 |
+
#Setting up API key to being able to fetch stocks from Alpha Vantage
|
22 |
+
|
23 |
+
api_key = os.environ.get('stocks_api')
|
24 |
+
ts = TimeSeries(key=api_key, output_format='pandas')
|
25 |
+
|
26 |
+
#Defining a function to fetch stocks
|
27 |
+
|
28 |
+
def fetch_stock_prices(symbol):
|
29 |
+
# Fetch daily adjusted stock prices; adjust the symbol as needed
|
30 |
+
data, meta_data = ts.get_daily(symbol=symbol, outputsize='full')
|
31 |
+
|
32 |
+
# Add a new column named 'ticker' and fill it with the ticker name
|
33 |
+
data['ticker'] = symbol
|
34 |
+
|
35 |
+
return data
|
36 |
+
|
37 |
+
#Usage
|
38 |
+
symbol = 'TSLA'
|
39 |
+
stock_data = fetch_stock_prices(symbol)
|
40 |
+
print(stock_data.head())
|
41 |
+
|
42 |
+
# %%
|
43 |
+
# Defining the file path and name
|
44 |
+
file_path = 'TSLA_stock_price.csv'
|
45 |
+
|
46 |
+
# Saving the DataFrame to CSV
|
47 |
+
stock_data.to_csv(file_path)
|
48 |
+
|
49 |
+
print(f"Data saved to {file_path}")
|
50 |
+
|
51 |
+
|
news_preprocessing.py β Stocks news prediction/SML/news_preprocessing.py
RENAMED
@@ -1,4 +1,5 @@
|
|
1 |
# %%
|
|
|
2 |
from dotenv import load_dotenv
|
3 |
from datetime import datetime, timedelta
|
4 |
import requests
|
@@ -8,6 +9,7 @@ import pandas as pd
|
|
8 |
from textblob import TextBlob
|
9 |
|
10 |
# %%
|
|
|
11 |
def process_news_articles(news_articles):
|
12 |
# Convert list of dictionaries to DataFrame
|
13 |
df = pd.DataFrame(news_articles)
|
@@ -27,19 +29,21 @@ def process_news_articles(news_articles):
|
|
27 |
df['date'] = df['published_utc'].dt.date
|
28 |
df['time'] = df['published_utc'].dt.time
|
29 |
|
30 |
-
#
|
31 |
df.drop(['published_utc'], axis=1, inplace=True)
|
32 |
# set date to index
|
33 |
df = df.set_index("date")
|
34 |
df.reset_index(inplace=True)
|
35 |
df.index = pd.to_datetime(df.index)
|
36 |
-
df = df.groupby(['date', 'ticker'])['sentiment'].mean().reset_index(
|
37 |
|
38 |
return df
|
39 |
|
40 |
# %%
|
|
|
|
|
41 |
def exponential_moving_average(df, window):
|
42 |
-
|
43 |
df[f'exp_mean_{window}_days'] = df['sentiment'].ewm(span=window, adjust=False).mean()
|
44 |
return df
|
45 |
|
|
|
1 |
# %%
|
2 |
+
#Importing necessary libraries
|
3 |
from dotenv import load_dotenv
|
4 |
from datetime import datetime, timedelta
|
5 |
import requests
|
|
|
9 |
from textblob import TextBlob
|
10 |
|
11 |
# %%
|
12 |
+
#Defining a function to process news articles
|
13 |
def process_news_articles(news_articles):
|
14 |
# Convert list of dictionaries to DataFrame
|
15 |
df = pd.DataFrame(news_articles)
|
|
|
29 |
df['date'] = df['published_utc'].dt.date
|
30 |
df['time'] = df['published_utc'].dt.time
|
31 |
|
32 |
+
# Dropping unnecessary columns
|
33 |
df.drop(['published_utc'], axis=1, inplace=True)
|
34 |
# set date to index
|
35 |
df = df.set_index("date")
|
36 |
df.reset_index(inplace=True)
|
37 |
df.index = pd.to_datetime(df.index)
|
38 |
+
df = df.groupby(['date', 'ticker'])['sentiment'].mean().reset_index()
|
39 |
|
40 |
return df
|
41 |
|
42 |
# %%
|
43 |
+
#Defining a function for the exponential moving average
|
44 |
+
|
45 |
def exponential_moving_average(df, window):
|
46 |
+
# Calculate EMA on the 'sentiment' column
|
47 |
df[f'exp_mean_{window}_days'] = df['sentiment'].ewm(span=window, adjust=False).mean()
|
48 |
return df
|
49 |
|
stock_preprocessing.py β Stocks news prediction/SML/stock_preprocessing.py
RENAMED
@@ -1,4 +1,5 @@
|
|
1 |
# %%
|
|
|
2 |
from dotenv import load_dotenv
|
3 |
import os
|
4 |
from alpha_vantage.timeseries import TimeSeries
|
@@ -10,15 +11,14 @@ import modal
|
|
10 |
import requests
|
11 |
import pandas as pd
|
12 |
import json
|
13 |
-
|
14 |
import datetime
|
15 |
import numpy as np
|
16 |
from datetime import datetime, timedelta
|
17 |
-
|
18 |
-
|
19 |
-
# %%
|
20 |
load_dotenv()
|
21 |
|
|
|
|
|
22 |
api_key = os.environ.get('stocks_api') # Replace this with your actual API key
|
23 |
ts = TimeSeries(key=api_key, output_format='pandas')
|
24 |
|
@@ -28,12 +28,11 @@ data, meta_data = ts.get_daily(symbol='TSLA', outputsize='full')
|
|
28 |
print(data.head())
|
29 |
|
30 |
# %%
|
31 |
-
data
|
32 |
-
|
33 |
-
# %%
|
34 |
data.info()
|
35 |
|
36 |
# %%
|
|
|
37 |
meta_data
|
38 |
|
39 |
# %%
|
@@ -50,6 +49,7 @@ def today_is_a_business_day(today):
|
|
50 |
return False
|
51 |
|
52 |
# %%
|
|
|
53 |
def next_business_day(today):
|
54 |
|
55 |
# Real tomorrow
|
@@ -71,6 +71,7 @@ def next_business_day(today):
|
|
71 |
return isBusinessDay.to_numpy()[0]
|
72 |
|
73 |
# %%
|
|
|
74 |
def extract_business_day(start_date,end_date):
|
75 |
"""
|
76 |
Given a start_date and end_date.
|
@@ -82,27 +83,27 @@ def extract_business_day(start_date,end_date):
|
|
82 |
e.g is_open = [1,0,...,1] means that start_date = open, day after start_date = closed, and end_date = open
|
83 |
"""
|
84 |
|
85 |
-
#
|
86 |
end_date_save = end_date
|
87 |
|
88 |
-
#
|
89 |
cal = mcal.get_calendar('NYSE')
|
90 |
|
91 |
-
#
|
92 |
schedule = cal.schedule(start_date=start_date, end_date=end_date)
|
93 |
|
94 |
# Only need a list of dates when it's open (not open and close times)
|
95 |
isBusinessDay = np.array(schedule.market_open.dt.strftime('%Y-%m-%d'))
|
96 |
|
97 |
-
#
|
98 |
delta = datetime.timedelta(days=1)
|
99 |
start_date = datetime.datetime.strptime(start_date,"%Y-%m-%d") #datetime.date(2015, 7, 16)
|
100 |
end_date = datetime.datetime.strptime(end_date,"%Y-%m-%d") #datetime.date(2023, 1, 4)
|
101 |
|
102 |
-
#
|
103 |
num_days = (end_date - start_date).days + 1
|
104 |
|
105 |
-
#
|
106 |
is_open = np.zeros(num_days)
|
107 |
|
108 |
# iterate over range of dates
|
@@ -131,6 +132,7 @@ def extract_business_day(start_date,end_date):
|
|
131 |
return isBusinessDay, is_open
|
132 |
|
133 |
# %%
|
|
|
134 |
def clean_column_name(name):
|
135 |
# Remove all non-letter characters
|
136 |
cleaned_name = re.sub(r'[^a-zA-Z]', '', name)
|
@@ -150,15 +152,12 @@ data.reset_index(inplace=True)
|
|
150 |
data.head()
|
151 |
|
152 |
# %%
|
153 |
-
|
154 |
-
|
155 |
-
# %%
|
156 |
-
# Define the date range you're interested in
|
157 |
yesterday =datetime.now()-timedelta(days=1)
|
158 |
two_years_back = yesterday - timedelta(days=684)
|
159 |
|
160 |
# %%
|
161 |
-
#
|
162 |
filtered_df = data[(data['date'] >= two_years_back) & (data['date'] <= yesterday)]
|
163 |
|
164 |
# %%
|
@@ -171,7 +170,4 @@ print(filtered_df['date'].max())
|
|
171 |
# %%
|
172 |
filtered_df.shape
|
173 |
|
174 |
-
# %%
|
175 |
-
|
176 |
-
|
177 |
|
|
|
1 |
# %%
|
2 |
+
#Importing necessary libraries
|
3 |
from dotenv import load_dotenv
|
4 |
import os
|
5 |
from alpha_vantage.timeseries import TimeSeries
|
|
|
11 |
import requests
|
12 |
import pandas as pd
|
13 |
import json
|
14 |
+
import pandas_market_calendars as mcal
|
15 |
import datetime
|
16 |
import numpy as np
|
17 |
from datetime import datetime, timedelta
|
|
|
|
|
|
|
18 |
load_dotenv()
|
19 |
|
20 |
+
# %%
|
21 |
+
#Connecting to Alpha vantage using API key
|
22 |
api_key = os.environ.get('stocks_api') # Replace this with your actual API key
|
23 |
ts = TimeSeries(key=api_key, output_format='pandas')
|
24 |
|
|
|
28 |
print(data.head())
|
29 |
|
30 |
# %%
|
31 |
+
#Looking at data info
|
|
|
|
|
32 |
data.info()
|
33 |
|
34 |
# %%
|
35 |
+
#Looking at the meta data
|
36 |
meta_data
|
37 |
|
38 |
# %%
|
|
|
49 |
return False
|
50 |
|
51 |
# %%
|
52 |
+
#Defining a function to find the next business day
|
53 |
def next_business_day(today):
|
54 |
|
55 |
# Real tomorrow
|
|
|
71 |
return isBusinessDay.to_numpy()[0]
|
72 |
|
73 |
# %%
|
74 |
+
#Defining a function to extract business day
|
75 |
def extract_business_day(start_date,end_date):
|
76 |
"""
|
77 |
Given a start_date and end_date.
|
|
|
83 |
e.g is_open = [1,0,...,1] means that start_date = open, day after start_date = closed, and end_date = open
|
84 |
"""
|
85 |
|
86 |
+
# Saving for later
|
87 |
end_date_save = end_date
|
88 |
|
89 |
+
# Getting the NYSE calendar
|
90 |
cal = mcal.get_calendar('NYSE')
|
91 |
|
92 |
+
# Getting the NYSE calendar's open and close times for the specified period
|
93 |
schedule = cal.schedule(start_date=start_date, end_date=end_date)
|
94 |
|
95 |
# Only need a list of dates when it's open (not open and close times)
|
96 |
isBusinessDay = np.array(schedule.market_open.dt.strftime('%Y-%m-%d'))
|
97 |
|
98 |
+
# Going over all days:
|
99 |
delta = datetime.timedelta(days=1)
|
100 |
start_date = datetime.datetime.strptime(start_date,"%Y-%m-%d") #datetime.date(2015, 7, 16)
|
101 |
end_date = datetime.datetime.strptime(end_date,"%Y-%m-%d") #datetime.date(2023, 1, 4)
|
102 |
|
103 |
+
# Extracting days from the timedelta object
|
104 |
num_days = (end_date - start_date).days + 1
|
105 |
|
106 |
+
# Creating a boolean array for days being open (1) and closed (0)
|
107 |
is_open = np.zeros(num_days)
|
108 |
|
109 |
# iterate over range of dates
|
|
|
132 |
return isBusinessDay, is_open
|
133 |
|
134 |
# %%
|
135 |
+
#Defining a function to clean the column names
|
136 |
def clean_column_name(name):
|
137 |
# Remove all non-letter characters
|
138 |
cleaned_name = re.sub(r'[^a-zA-Z]', '', name)
|
|
|
152 |
data.head()
|
153 |
|
154 |
# %%
|
155 |
+
# Define the date range we're interested in
|
|
|
|
|
|
|
156 |
yesterday =datetime.now()-timedelta(days=1)
|
157 |
two_years_back = yesterday - timedelta(days=684)
|
158 |
|
159 |
# %%
|
160 |
+
# Filtering the DataFrame to this range
|
161 |
filtered_df = data[(data['date'] >= two_years_back) & (data['date'] <= yesterday)]
|
162 |
|
163 |
# %%
|
|
|
170 |
# %%
|
171 |
filtered_df.shape
|
172 |
|
|
|
|
|
|
|
173 |
|
Stocks news prediction/SML/training_pipeline.py
ADDED
@@ -0,0 +1,256 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# %%
|
2 |
+
#Importing necessary libraries
|
3 |
+
import hopsworks
|
4 |
+
import hsfs
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
import os
|
7 |
+
import pandas as pd
|
8 |
+
import numpy as np
|
9 |
+
from sklearn.preprocessing import OneHotEncoder
|
10 |
+
from sklearn.preprocessing import MinMaxScaler
|
11 |
+
from sklearn.metrics import mean_squared_error
|
12 |
+
from hsml.schema import Schema
|
13 |
+
from hsml.model_schema import ModelSchema
|
14 |
+
from tensorflow.keras.models import Sequential
|
15 |
+
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout
|
16 |
+
from sklearn.preprocessing import StandardScaler # Import StandardScaler from scikit-learn
|
17 |
+
import joblib
|
18 |
+
|
19 |
+
load_dotenv()
|
20 |
+
|
21 |
+
#Connecting to hopsworks
|
22 |
+
api_key = os.environ.get('hopsworks_api')
|
23 |
+
project = hopsworks.login(api_key_value=api_key)
|
24 |
+
fs = project.get_feature_store()
|
25 |
+
|
26 |
+
#Another connection to hopsworks
|
27 |
+
api_key = os.getenv('hopsworks_api')
|
28 |
+
connection = hsfs.connection()
|
29 |
+
fs = connection.get_feature_store()
|
30 |
+
|
31 |
+
# %%
|
32 |
+
#Getting the feature view
|
33 |
+
feature_view = fs.get_feature_view(
|
34 |
+
name='tesla_stocks_fv',
|
35 |
+
version=1
|
36 |
+
)
|
37 |
+
|
38 |
+
# %%
|
39 |
+
#Setting up train & test split dates
|
40 |
+
train_start = "2022-06-22"
|
41 |
+
train_end = "2023-12-31"
|
42 |
+
|
43 |
+
test_start = '2024-01-01'
|
44 |
+
test_end = "2024-05-03"
|
45 |
+
|
46 |
+
# %%
|
47 |
+
#Creating the train/test split on the feature view with the split dates
|
48 |
+
feature_view.create_train_test_split(
|
49 |
+
train_start=train_start,
|
50 |
+
train_end=train_end,
|
51 |
+
test_start=test_start,
|
52 |
+
test_end=test_end,
|
53 |
+
data_format='csv',
|
54 |
+
coalesce= True,
|
55 |
+
statistics_config={'histogram':True,'correlations':True})
|
56 |
+
|
57 |
+
# %%
|
58 |
+
#Collecting the split from feature view
|
59 |
+
X_train, X_test, y_train, y_test = feature_view.get_train_test_split(6)
|
60 |
+
|
61 |
+
# %%
|
62 |
+
#Inspecting X_train
|
63 |
+
X_train
|
64 |
+
|
65 |
+
# %%
|
66 |
+
#Converting date into datetime
|
67 |
+
X_train['date'] = pd.to_datetime(X_train['date']).dt.date
|
68 |
+
X_test['date'] = pd.to_datetime(X_test['date']).dt.date
|
69 |
+
X_train['date'] = pd.to_datetime(X_train['date'])
|
70 |
+
X_test['date'] = pd.to_datetime(X_test['date'])
|
71 |
+
|
72 |
+
# %%
|
73 |
+
X_train.head()
|
74 |
+
|
75 |
+
# %%
|
76 |
+
# Extracting the 'ticker' column
|
77 |
+
tickers = X_train[['ticker']]
|
78 |
+
|
79 |
+
# Initializing OneHotEncoder
|
80 |
+
encoder = OneHotEncoder()
|
81 |
+
|
82 |
+
# Fitting and transforming the 'ticker' column
|
83 |
+
ticker_encoded = encoder.fit_transform(tickers)
|
84 |
+
|
85 |
+
# Converting the encoded column into a DataFrame
|
86 |
+
ticker_encoded_df = pd.DataFrame(ticker_encoded.toarray(), columns=encoder.get_feature_names_out(['ticker']))
|
87 |
+
|
88 |
+
# Concatenating the encoded DataFrame with the original DataFrame
|
89 |
+
X_train = pd.concat([X_train, ticker_encoded_df], axis=1)
|
90 |
+
|
91 |
+
# Dropping the original 'ticker' column
|
92 |
+
X_train.drop('ticker', axis=1, inplace=True)
|
93 |
+
|
94 |
+
# %%
|
95 |
+
#Inspecting X train after onehotencoding 'Ticker'
|
96 |
+
X_train.head()
|
97 |
+
|
98 |
+
# %%
|
99 |
+
#Doing the same for X test as done to X train
|
100 |
+
|
101 |
+
tickers = X_test[['ticker']]
|
102 |
+
|
103 |
+
# Initializing OneHotEncoder
|
104 |
+
encoder = OneHotEncoder()
|
105 |
+
|
106 |
+
# Fitting and transforming the 'ticker' column
|
107 |
+
ticker_encoded_test = encoder.fit_transform(tickers)
|
108 |
+
|
109 |
+
# Converting the encoded column into a DataFrame
|
110 |
+
ticker_encoded_df_test = pd.DataFrame(ticker_encoded_test.toarray(), columns=encoder.get_feature_names_out(['ticker']))
|
111 |
+
|
112 |
+
# Concatenating the encoded DataFrame with the original DataFrame
|
113 |
+
X_test = pd.concat([X_test, ticker_encoded_df_test], axis=1)
|
114 |
+
|
115 |
+
# Dropping the original 'ticker' column
|
116 |
+
X_test.drop('ticker', axis=1, inplace=True)
|
117 |
+
|
118 |
+
# %%
|
119 |
+
#Loading in MinMaxScaler to be used on the target variable 'open'
|
120 |
+
scaler = MinMaxScaler()
|
121 |
+
|
122 |
+
# Fitting and transforming the 'open' column
|
123 |
+
y_train['open_scaled'] = scaler.fit_transform(y_train[['open']])
|
124 |
+
y_train.drop('open', axis=1, inplace=True)
|
125 |
+
|
126 |
+
# %%
|
127 |
+
#Doing the same to y_test as done to y_train
|
128 |
+
y_test['open_scaled'] = scaler.fit_transform(y_test[['open']])
|
129 |
+
y_test.drop('open', axis=1, inplace=True)
|
130 |
+
|
131 |
+
# %%
|
132 |
+
#Defining the function for the LSTM model
|
133 |
+
def create_model(input_shape,
|
134 |
+
LSTM_filters=64,
|
135 |
+
dropout=0.1,
|
136 |
+
recurrent_dropout=0.1,
|
137 |
+
dense_dropout=0.5,
|
138 |
+
activation='relu',
|
139 |
+
depth=1):
|
140 |
+
|
141 |
+
model = Sequential()
|
142 |
+
|
143 |
+
# Input layer
|
144 |
+
model.add(Input(shape=input_shape))
|
145 |
+
|
146 |
+
if depth > 1:
|
147 |
+
for i in range(1, depth):
|
148 |
+
# Recurrent layer
|
149 |
+
model.add(LSTM(LSTM_filters, return_sequences=True, dropout=dropout, recurrent_dropout=recurrent_dropout))
|
150 |
+
|
151 |
+
# Recurrent layer
|
152 |
+
model.add(LSTM(LSTM_filters, return_sequences=False, dropout=dropout, recurrent_dropout=recurrent_dropout))
|
153 |
+
|
154 |
+
# Fully connected layer
|
155 |
+
if activation == 'relu':
|
156 |
+
model.add(Dense(LSTM_filters, activation='relu'))
|
157 |
+
elif activation == 'leaky_relu':
|
158 |
+
model.add(Dense(LSTM_filters))
|
159 |
+
model.add(tf.keras.layers.LeakyReLU(alpha=0.1))
|
160 |
+
|
161 |
+
# Dropout for regularization
|
162 |
+
model.add(Dropout(dense_dropout))
|
163 |
+
|
164 |
+
# Output layer for predicting one day forward
|
165 |
+
model.add(Dense(1, activation='linear'))
|
166 |
+
|
167 |
+
# Compile the model
|
168 |
+
model.compile(optimizer='adam', loss='mse')
|
169 |
+
|
170 |
+
return model
|
171 |
+
|
172 |
+
# %%
|
173 |
+
# As X_train['date'] column exists and is in datetime format, we're converting it
|
174 |
+
X_train['year'] = X_train['date'].dt.year
|
175 |
+
X_train['month'] = X_train['date'].dt.month
|
176 |
+
X_train['day'] = X_train['date'].dt.day
|
177 |
+
|
178 |
+
# Dropping the original date column
|
179 |
+
X_train.drop(columns=['date'], inplace=True)
|
180 |
+
|
181 |
+
# Converting dataframe to numpy array
|
182 |
+
X_train_array = X_train.to_numpy()
|
183 |
+
|
184 |
+
# Reshaping the array to have a shape suitable for LSTM
|
185 |
+
X_train_array = np.expand_dims(X_train_array, axis=1)
|
186 |
+
|
187 |
+
# %%
|
188 |
+
# Convert DataFrame to numpy array
|
189 |
+
X_train_array = X_train.values
|
190 |
+
|
191 |
+
# Reshaping X_train_array to add a time step dimension
|
192 |
+
X_train_reshaped = X_train_array.reshape(X_train_array.shape[0], 1, X_train_array.shape[1])
|
193 |
+
|
194 |
+
# Assuming X_train_reshaped shape is now (374, 1, 5)
|
195 |
+
input_shape = X_train_reshaped.shape[1:]
|
196 |
+
|
197 |
+
# Create the model
|
198 |
+
model = create_model(input_shape=input_shape)
|
199 |
+
|
200 |
+
# %%
|
201 |
+
#Fitting the model on the training dataset
|
202 |
+
model.fit(X_train_reshaped, y_train)
|
203 |
+
|
204 |
+
# %%
|
205 |
+
# As X_test['date'] column exists and is in datetime format, we're converting it
|
206 |
+
X_test['year'] = X_test['date'].dt.year
|
207 |
+
X_test['month'] = X_test['date'].dt.month
|
208 |
+
X_test['day'] = X_test['date'].dt.day
|
209 |
+
|
210 |
+
# Dropping the original date column
|
211 |
+
X_test.drop(columns=['date'], inplace=True)
|
212 |
+
|
213 |
+
# Converting dataframe to numpy array
|
214 |
+
X_test_array = X_test.to_numpy()
|
215 |
+
|
216 |
+
# Reshape the array to have a shape suitable for LSTM
|
217 |
+
X_test_array = np.expand_dims(X_test_array, axis=1)
|
218 |
+
|
219 |
+
# %%
|
220 |
+
#Predicting y_pred with X_test
|
221 |
+
y_pred = model.predict(X_test_array)
|
222 |
+
|
223 |
+
# %%
|
224 |
+
#Conneting to hopsworks model registry
|
225 |
+
mr = project.get_model_registry()
|
226 |
+
|
227 |
+
# %%
|
228 |
+
# Compute RMSE metric for filling the model
|
229 |
+
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
|
230 |
+
rmse_metrics = {"RMSE": rmse}
|
231 |
+
rmse_metrics
|
232 |
+
|
233 |
+
# %%
|
234 |
+
#Setting up the model schema
|
235 |
+
input_schema = Schema(X_train)
|
236 |
+
output_schema = Schema(y_train)
|
237 |
+
model_schema = ModelSchema(input_schema, output_schema)
|
238 |
+
|
239 |
+
# %%
|
240 |
+
#Creating a file colled 'stock_model'
|
241 |
+
model_dir="stock_model"
|
242 |
+
if os.path.isdir(model_dir) == False:
|
243 |
+
os.mkdir(model_dir)
|
244 |
+
|
245 |
+
# %%
|
246 |
+
#Saving the model to hopsworks model registry
|
247 |
+
stock_pred_model = mr.tensorflow.create_model(
|
248 |
+
name="stock_pred_model",
|
249 |
+
metrics= rmse_metrics,
|
250 |
+
model_schema=model_schema,
|
251 |
+
description="Stock Market TSLA Predictor from News Sentiment",
|
252 |
+
)
|
253 |
+
|
254 |
+
stock_pred_model.save(model_dir)
|
255 |
+
|
256 |
+
|
TSLA_stock_price.csv β Stocks news prediction/TSLA_stock_price.csv
RENAMED
File without changes
|
news_articles.csv β Stocks news prediction/news_articles.csv
RENAMED
File without changes
|
news_articles_ema.csv β Stocks news prediction/news_articles_ema.csv
RENAMED
File without changes
|
feature_engineering.ipynb
DELETED
@@ -1,73 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "code",
|
5 |
-
"execution_count": 1,
|
6 |
-
"metadata": {},
|
7 |
-
"outputs": [],
|
8 |
-
"source": [
|
9 |
-
"import requests\n",
|
10 |
-
"import pandas as pd\n",
|
11 |
-
"import json\n",
|
12 |
-
"import datetime\n",
|
13 |
-
"import numpy as np\n",
|
14 |
-
"from datetime import timedelta "
|
15 |
-
]
|
16 |
-
},
|
17 |
-
{
|
18 |
-
"cell_type": "code",
|
19 |
-
"execution_count": 3,
|
20 |
-
"metadata": {},
|
21 |
-
"outputs": [],
|
22 |
-
"source": [
|
23 |
-
"def getNews(api_key,endpoint,ticker,from_date,to_date,num=1000):\n",
|
24 |
-
" # Set the parameters for the request\n",
|
25 |
-
" params = {\n",
|
26 |
-
" \"api_token\": api_key,\n",
|
27 |
-
" \"s\": ticker,\n",
|
28 |
-
" \"from\": from_date, \n",
|
29 |
-
" \"to\": to_date,\n",
|
30 |
-
" \"limit\": num,\n",
|
31 |
-
" }\n",
|
32 |
-
" \n",
|
33 |
-
" # Make the request to the API\n",
|
34 |
-
" response = requests.get(endpoint, params=params)\n",
|
35 |
-
" \n",
|
36 |
-
" # Print the response from the API\n",
|
37 |
-
" #print(response.json())\n",
|
38 |
-
"\n",
|
39 |
-
" #Return a Pandas dataframe from the response\n",
|
40 |
-
" return pd.DataFrame(response.json())"
|
41 |
-
]
|
42 |
-
},
|
43 |
-
{
|
44 |
-
"cell_type": "code",
|
45 |
-
"execution_count": null,
|
46 |
-
"metadata": {},
|
47 |
-
"outputs": [],
|
48 |
-
"source": []
|
49 |
-
}
|
50 |
-
],
|
51 |
-
"metadata": {
|
52 |
-
"kernelspec": {
|
53 |
-
"display_name": "base",
|
54 |
-
"language": "python",
|
55 |
-
"name": "python3"
|
56 |
-
},
|
57 |
-
"language_info": {
|
58 |
-
"codemirror_mode": {
|
59 |
-
"name": "ipython",
|
60 |
-
"version": 3
|
61 |
-
},
|
62 |
-
"file_extension": ".py",
|
63 |
-
"mimetype": "text/x-python",
|
64 |
-
"name": "python",
|
65 |
-
"nbconvert_exporter": "python",
|
66 |
-
"pygments_lexer": "ipython3",
|
67 |
-
"version": "3.11.9"
|
68 |
-
},
|
69 |
-
"orig_nbformat": 4
|
70 |
-
},
|
71 |
-
"nbformat": 4,
|
72 |
-
"nbformat_minor": 2
|
73 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
feature_pipeline.ipynb
DELETED
@@ -1,775 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "code",
|
5 |
-
"execution_count": 1,
|
6 |
-
"metadata": {},
|
7 |
-
"outputs": [],
|
8 |
-
"source": [
|
9 |
-
"from dotenv import load_dotenv\n",
|
10 |
-
"import os "
|
11 |
-
]
|
12 |
-
},
|
13 |
-
{
|
14 |
-
"cell_type": "code",
|
15 |
-
"execution_count": 2,
|
16 |
-
"metadata": {},
|
17 |
-
"outputs": [
|
18 |
-
{
|
19 |
-
"name": "stdout",
|
20 |
-
"output_type": "stream",
|
21 |
-
"text": [
|
22 |
-
"Requirement already satisfied: great_expectations==0.18.12 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (0.18.12)\n",
|
23 |
-
"Requirement already satisfied: altair<5.0.0,>=4.2.1 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (4.2.2)\n",
|
24 |
-
"Requirement already satisfied: Click>=7.1.2 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (8.1.7)\n",
|
25 |
-
"Requirement already satisfied: colorama>=0.4.3 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (0.4.6)\n",
|
26 |
-
"Requirement already satisfied: cryptography>=3.2 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (42.0.6)\n",
|
27 |
-
"Requirement already satisfied: Ipython>=7.16.3 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (8.22.2)\n",
|
28 |
-
"Requirement already satisfied: ipywidgets>=7.5.1 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (8.1.2)\n",
|
29 |
-
"Requirement already satisfied: jinja2>=2.10 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (3.1.3)\n",
|
30 |
-
"Requirement already satisfied: jsonpatch>=1.22 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (1.33)\n",
|
31 |
-
"Requirement already satisfied: jsonschema>=2.5.1 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (4.22.0)\n",
|
32 |
-
"Requirement already satisfied: makefun<2,>=1.7.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (1.15.2)\n",
|
33 |
-
"Requirement already satisfied: marshmallow<4.0.0,>=3.7.1 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (3.21.2)\n",
|
34 |
-
"Requirement already satisfied: mistune>=0.8.4 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (3.0.2)\n",
|
35 |
-
"Requirement already satisfied: nbformat>=5.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (5.10.4)\n",
|
36 |
-
"Requirement already satisfied: notebook>=6.4.10 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (7.1.3)\n",
|
37 |
-
"Requirement already satisfied: packaging in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (24.0)\n",
|
38 |
-
"Requirement already satisfied: pydantic>=1.9.2 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (2.7.1)\n",
|
39 |
-
"Requirement already satisfied: pyparsing>=2.4 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (3.1.2)\n",
|
40 |
-
"Requirement already satisfied: python-dateutil>=2.8.1 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (2.9.0)\n",
|
41 |
-
"Requirement already satisfied: pytz>=2021.3 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (2024.1)\n",
|
42 |
-
"Requirement already satisfied: requests>=2.20 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (2.31.0)\n",
|
43 |
-
"Requirement already satisfied: ruamel.yaml<0.17.18,>=0.16 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (0.17.17)\n",
|
44 |
-
"Requirement already satisfied: scipy>=1.6.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (1.13.0)\n",
|
45 |
-
"Requirement already satisfied: tqdm>=4.59.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (4.66.4)\n",
|
46 |
-
"Requirement already satisfied: typing-extensions>=3.10.0.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (4.11.0)\n",
|
47 |
-
"Requirement already satisfied: tzlocal>=1.2 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (5.2)\n",
|
48 |
-
"Requirement already satisfied: urllib3>=1.26 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (2.2.1)\n",
|
49 |
-
"Requirement already satisfied: numpy<2.0.0,>=1.22.4 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (1.26.4)\n",
|
50 |
-
"Requirement already satisfied: pandas>=1.3.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from great_expectations==0.18.12) (1.5.1)\n",
|
51 |
-
"Requirement already satisfied: entrypoints in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from altair<5.0.0,>=4.2.1->great_expectations==0.18.12) (0.4)\n",
|
52 |
-
"Requirement already satisfied: toolz in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from altair<5.0.0,>=4.2.1->great_expectations==0.18.12) (0.12.1)\n",
|
53 |
-
"Requirement already satisfied: cffi>=1.12 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from cryptography>=3.2->great_expectations==0.18.12) (1.16.0)\n",
|
54 |
-
"Requirement already satisfied: decorator in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from Ipython>=7.16.3->great_expectations==0.18.12) (5.1.1)\n",
|
55 |
-
"Requirement already satisfied: jedi>=0.16 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from Ipython>=7.16.3->great_expectations==0.18.12) (0.19.1)\n",
|
56 |
-
"Requirement already satisfied: matplotlib-inline in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from Ipython>=7.16.3->great_expectations==0.18.12) (0.1.7)\n",
|
57 |
-
"Requirement already satisfied: prompt-toolkit<3.1.0,>=3.0.41 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from Ipython>=7.16.3->great_expectations==0.18.12) (3.0.42)\n",
|
58 |
-
"Requirement already satisfied: pygments>=2.4.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from Ipython>=7.16.3->great_expectations==0.18.12) (2.17.2)\n",
|
59 |
-
"Requirement already satisfied: stack-data in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from Ipython>=7.16.3->great_expectations==0.18.12) (0.6.2)\n",
|
60 |
-
"Requirement already satisfied: traitlets>=5.13.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from Ipython>=7.16.3->great_expectations==0.18.12) (5.14.3)\n",
|
61 |
-
"Requirement already satisfied: comm>=0.1.3 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from ipywidgets>=7.5.1->great_expectations==0.18.12) (0.2.2)\n",
|
62 |
-
"Requirement already satisfied: widgetsnbextension~=4.0.10 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from ipywidgets>=7.5.1->great_expectations==0.18.12) (4.0.10)\n",
|
63 |
-
"Requirement already satisfied: jupyterlab-widgets~=3.0.10 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from ipywidgets>=7.5.1->great_expectations==0.18.12) (3.0.10)\n",
|
64 |
-
"Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jinja2>=2.10->great_expectations==0.18.12) (2.1.5)\n",
|
65 |
-
"Requirement already satisfied: jsonpointer>=1.9 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jsonpatch>=1.22->great_expectations==0.18.12) (2.4)\n",
|
66 |
-
"Requirement already satisfied: attrs>=22.2.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jsonschema>=2.5.1->great_expectations==0.18.12) (23.2.0)\n",
|
67 |
-
"Requirement already satisfied: jsonschema-specifications>=2023.03.6 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jsonschema>=2.5.1->great_expectations==0.18.12) (2023.12.1)\n",
|
68 |
-
"Requirement already satisfied: referencing>=0.28.4 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jsonschema>=2.5.1->great_expectations==0.18.12) (0.35.1)\n",
|
69 |
-
"Requirement already satisfied: rpds-py>=0.7.1 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jsonschema>=2.5.1->great_expectations==0.18.12) (0.18.0)\n",
|
70 |
-
"Requirement already satisfied: fastjsonschema>=2.15 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from nbformat>=5.0->great_expectations==0.18.12) (2.19.1)\n",
|
71 |
-
"Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from nbformat>=5.0->great_expectations==0.18.12) (5.7.2)\n",
|
72 |
-
"Requirement already satisfied: jupyter-server<3,>=2.4.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from notebook>=6.4.10->great_expectations==0.18.12) (2.14.0)\n",
|
73 |
-
"Requirement already satisfied: jupyterlab-server<3,>=2.22.1 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from notebook>=6.4.10->great_expectations==0.18.12) (2.27.1)\n",
|
74 |
-
"Requirement already satisfied: jupyterlab<4.2,>=4.1.1 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from notebook>=6.4.10->great_expectations==0.18.12) (4.1.8)\n",
|
75 |
-
"Requirement already satisfied: notebook-shim<0.3,>=0.2 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from notebook>=6.4.10->great_expectations==0.18.12) (0.2.4)\n",
|
76 |
-
"Requirement already satisfied: tornado>=6.2.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from notebook>=6.4.10->great_expectations==0.18.12) (6.3.3)\n",
|
77 |
-
"Requirement already satisfied: annotated-types>=0.4.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from pydantic>=1.9.2->great_expectations==0.18.12) (0.6.0)\n",
|
78 |
-
"Requirement already satisfied: pydantic-core==2.18.2 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from pydantic>=1.9.2->great_expectations==0.18.12) (2.18.2)\n",
|
79 |
-
"Requirement already satisfied: six>=1.5 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from python-dateutil>=2.8.1->great_expectations==0.18.12) (1.16.0)\n",
|
80 |
-
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from requests>=2.20->great_expectations==0.18.12) (3.3.2)\n",
|
81 |
-
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from requests>=2.20->great_expectations==0.18.12) (3.7)\n",
|
82 |
-
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from requests>=2.20->great_expectations==0.18.12) (2024.2.2)\n",
|
83 |
-
"Requirement already satisfied: tzdata in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from tzlocal>=1.2->great_expectations==0.18.12) (2024.1)\n",
|
84 |
-
"Requirement already satisfied: pycparser in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from cffi>=1.12->cryptography>=3.2->great_expectations==0.18.12) (2.22)\n",
|
85 |
-
"Requirement already satisfied: parso<0.9.0,>=0.8.3 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jedi>=0.16->Ipython>=7.16.3->great_expectations==0.18.12) (0.8.4)\n",
|
86 |
-
"Requirement already satisfied: platformdirs>=2.5 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jupyter-core!=5.0.*,>=4.12->nbformat>=5.0->great_expectations==0.18.12) (4.2.1)\n",
|
87 |
-
"Requirement already satisfied: pywin32>=300 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jupyter-core!=5.0.*,>=4.12->nbformat>=5.0->great_expectations==0.18.12) (305.1)\n",
|
88 |
-
"Requirement already satisfied: anyio>=3.1.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (4.3.0)\n",
|
89 |
-
"Requirement already satisfied: argon2-cffi>=21.1 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (23.1.0)\n",
|
90 |
-
"Requirement already satisfied: jupyter-client>=7.4.4 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (8.6.1)\n",
|
91 |
-
"Requirement already satisfied: jupyter-events>=0.9.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (0.10.0)\n",
|
92 |
-
"Requirement already satisfied: jupyter-server-terminals>=0.4.4 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (0.5.3)\n",
|
93 |
-
"Requirement already satisfied: nbconvert>=6.4.4 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (7.16.4)\n",
|
94 |
-
"Requirement already satisfied: overrides>=5.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (7.7.0)\n",
|
95 |
-
"Requirement already satisfied: prometheus-client>=0.9 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (0.20.0)\n",
|
96 |
-
"Requirement already satisfied: pywinpty>=2.0.1 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (2.0.13)\n",
|
97 |
-
"Requirement already satisfied: pyzmq>=24 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (25.1.2)\n",
|
98 |
-
"Requirement already satisfied: send2trash>=1.8.2 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (1.8.3)\n",
|
99 |
-
"Requirement already satisfied: terminado>=0.8.3 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (0.18.1)\n",
|
100 |
-
"Requirement already satisfied: websocket-client>=1.7 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (1.8.0)\n",
|
101 |
-
"Requirement already satisfied: async-lru>=1.0.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jupyterlab<4.2,>=4.1.1->notebook>=6.4.10->great_expectations==0.18.12) (2.0.4)\n",
|
102 |
-
"Requirement already satisfied: httpx>=0.25.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jupyterlab<4.2,>=4.1.1->notebook>=6.4.10->great_expectations==0.18.12) (0.27.0)\n",
|
103 |
-
"Requirement already satisfied: ipykernel>=6.5.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jupyterlab<4.2,>=4.1.1->notebook>=6.4.10->great_expectations==0.18.12) (6.29.3)\n",
|
104 |
-
"Requirement already satisfied: jupyter-lsp>=2.0.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jupyterlab<4.2,>=4.1.1->notebook>=6.4.10->great_expectations==0.18.12) (2.2.5)\n",
|
105 |
-
"Requirement already satisfied: babel>=2.10 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jupyterlab-server<3,>=2.22.1->notebook>=6.4.10->great_expectations==0.18.12) (2.14.0)\n",
|
106 |
-
"Requirement already satisfied: json5>=0.9.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jupyterlab-server<3,>=2.22.1->notebook>=6.4.10->great_expectations==0.18.12) (0.9.25)\n",
|
107 |
-
"Requirement already satisfied: wcwidth in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from prompt-toolkit<3.1.0,>=3.0.41->Ipython>=7.16.3->great_expectations==0.18.12) (0.2.13)\n",
|
108 |
-
"Requirement already satisfied: executing>=1.2.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from stack-data->Ipython>=7.16.3->great_expectations==0.18.12) (2.0.1)\n",
|
109 |
-
"Requirement already satisfied: asttokens>=2.1.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from stack-data->Ipython>=7.16.3->great_expectations==0.18.12) (2.4.1)\n",
|
110 |
-
"Requirement already satisfied: pure-eval in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from stack-data->Ipython>=7.16.3->great_expectations==0.18.12) (0.2.2)\n",
|
111 |
-
"Requirement already satisfied: sniffio>=1.1 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (1.3.1)\n",
|
112 |
-
"Requirement already satisfied: argon2-cffi-bindings in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (21.2.0)\n",
|
113 |
-
"Requirement already satisfied: httpcore==1.* in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from httpx>=0.25.0->jupyterlab<4.2,>=4.1.1->notebook>=6.4.10->great_expectations==0.18.12) (1.0.5)\n",
|
114 |
-
"Requirement already satisfied: h11<0.15,>=0.13 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from httpcore==1.*->httpx>=0.25.0->jupyterlab<4.2,>=4.1.1->notebook>=6.4.10->great_expectations==0.18.12) (0.14.0)\n",
|
115 |
-
"Requirement already satisfied: debugpy>=1.6.5 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from ipykernel>=6.5.0->jupyterlab<4.2,>=4.1.1->notebook>=6.4.10->great_expectations==0.18.12) (1.6.7)\n",
|
116 |
-
"Requirement already satisfied: nest-asyncio in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from ipykernel>=6.5.0->jupyterlab<4.2,>=4.1.1->notebook>=6.4.10->great_expectations==0.18.12) (1.6.0)\n",
|
117 |
-
"Requirement already satisfied: psutil in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from ipykernel>=6.5.0->jupyterlab<4.2,>=4.1.1->notebook>=6.4.10->great_expectations==0.18.12) (5.9.0)\n",
|
118 |
-
"Requirement already satisfied: python-json-logger>=2.0.4 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (2.0.7)\n",
|
119 |
-
"Requirement already satisfied: pyyaml>=5.3 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (6.0.1)\n",
|
120 |
-
"Requirement already satisfied: rfc3339-validator in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (0.1.4)\n",
|
121 |
-
"Requirement already satisfied: rfc3986-validator>=0.1.1 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (0.1.1)\n",
|
122 |
-
"Requirement already satisfied: beautifulsoup4 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (4.12.3)\n",
|
123 |
-
"Requirement already satisfied: bleach!=5.0.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (6.1.0)\n",
|
124 |
-
"Requirement already satisfied: defusedxml in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (0.7.1)\n",
|
125 |
-
"Requirement already satisfied: jupyterlab-pygments in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (0.3.0)\n",
|
126 |
-
"Requirement already satisfied: nbclient>=0.5.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (0.10.0)\n",
|
127 |
-
"Requirement already satisfied: pandocfilters>=1.4.1 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (1.5.1)\n",
|
128 |
-
"Requirement already satisfied: tinycss2 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (1.3.0)\n",
|
129 |
-
"Requirement already satisfied: webencodings in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from bleach!=5.0.0->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (0.5.1)\n",
|
130 |
-
"Requirement already satisfied: fqdn in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (1.5.1)\n",
|
131 |
-
"Requirement already satisfied: isoduration in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (20.11.0)\n",
|
132 |
-
"Requirement already satisfied: uri-template in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (1.3.0)\n",
|
133 |
-
"Requirement already satisfied: webcolors>=1.11 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (1.13)\n",
|
134 |
-
"Requirement already satisfied: soupsieve>1.2 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from beautifulsoup4->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (2.5)\n",
|
135 |
-
"Requirement already satisfied: arrow>=0.15.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from isoduration->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (1.3.0)\n",
|
136 |
-
"Requirement already satisfied: types-python-dateutil>=2.8.10 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from arrow>=0.15.0->isoduration->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook>=6.4.10->great_expectations==0.18.12) (2.9.0.20240316)\n"
|
137 |
-
]
|
138 |
-
}
|
139 |
-
],
|
140 |
-
"source": [
|
141 |
-
"!pip install great_expectations==0.18.12"
|
142 |
-
]
|
143 |
-
},
|
144 |
-
{
|
145 |
-
"cell_type": "code",
|
146 |
-
"execution_count": 3,
|
147 |
-
"metadata": {},
|
148 |
-
"outputs": [],
|
149 |
-
"source": [
|
150 |
-
"# Import necessary libraries\n",
|
151 |
-
"import pandas as pd # For data manipulation using DataFrames\n",
|
152 |
-
"import numpy as np # For numerical operations\n",
|
153 |
-
"import matplotlib.pyplot as plt # For data visualization\n",
|
154 |
-
"import os # For operating system-related tasks\n",
|
155 |
-
"import joblib # For saving and loading models\n",
|
156 |
-
"import hopsworks # For getting access to hopsworks\n",
|
157 |
-
"\n",
|
158 |
-
"\n",
|
159 |
-
"\n",
|
160 |
-
"# Import specific modules from scikit-learn\n",
|
161 |
-
"from sklearn.preprocessing import StandardScaler, OneHotEncoder # For data preprocessing\n",
|
162 |
-
"from sklearn.metrics import accuracy_score # For evaluating model accuracy"
|
163 |
-
]
|
164 |
-
},
|
165 |
-
{
|
166 |
-
"cell_type": "code",
|
167 |
-
"execution_count": 4,
|
168 |
-
"metadata": {},
|
169 |
-
"outputs": [
|
170 |
-
{
|
171 |
-
"name": "stdout",
|
172 |
-
"output_type": "stream",
|
173 |
-
"text": [
|
174 |
-
"Requirement already satisfied: modal in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (0.62.141)\n",
|
175 |
-
"Requirement already satisfied: aiohttp in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from modal) (3.9.5)\n",
|
176 |
-
"Requirement already satisfied: aiostream~=0.5.2 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from modal) (0.5.2)\n",
|
177 |
-
"Requirement already satisfied: certifi in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from modal) (2024.2.2)\n",
|
178 |
-
"Requirement already satisfied: click>=8.1.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from modal) (8.1.7)\n",
|
179 |
-
"Requirement already satisfied: fastapi in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from modal) (0.111.0)\n",
|
180 |
-
"Requirement already satisfied: grpclib==0.4.7 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from modal) (0.4.7)\n",
|
181 |
-
"Requirement already satisfied: protobuf!=4.24.0,<5.0,>=3.19 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from modal) (4.25.3)\n",
|
182 |
-
"Requirement already satisfied: rich>=12.0.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from modal) (13.7.1)\n",
|
183 |
-
"Requirement already satisfied: synchronicity~=0.6.6 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from modal) (0.6.7)\n",
|
184 |
-
"Requirement already satisfied: toml in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from modal) (0.10.2)\n",
|
185 |
-
"Requirement already satisfied: typer>=0.9 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from modal) (0.12.3)\n",
|
186 |
-
"Requirement already satisfied: types-certifi in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from modal) (2021.10.8.3)\n",
|
187 |
-
"Requirement already satisfied: types-toml in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from modal) (0.10.8.20240310)\n",
|
188 |
-
"Requirement already satisfied: watchfiles in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from modal) (0.21.0)\n",
|
189 |
-
"Requirement already satisfied: typing-extensions~=4.6 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from modal) (4.11.0)\n",
|
190 |
-
"Requirement already satisfied: h2<5,>=3.1.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from grpclib==0.4.7->modal) (4.1.0)\n",
|
191 |
-
"Requirement already satisfied: multidict in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from grpclib==0.4.7->modal) (6.0.5)\n",
|
192 |
-
"Requirement already satisfied: colorama in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from click>=8.1.0->modal) (0.4.6)\n",
|
193 |
-
"Requirement already satisfied: markdown-it-py>=2.2.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from rich>=12.0.0->modal) (3.0.0)\n",
|
194 |
-
"Requirement already satisfied: pygments<3.0.0,>=2.13.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from rich>=12.0.0->modal) (2.17.2)\n",
|
195 |
-
"Requirement already satisfied: sigtools==4.0.1 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from synchronicity~=0.6.6->modal) (4.0.1)\n",
|
196 |
-
"Requirement already satisfied: attrs in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from sigtools==4.0.1->synchronicity~=0.6.6->modal) (23.2.0)\n",
|
197 |
-
"Requirement already satisfied: shellingham>=1.3.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from typer>=0.9->modal) (1.5.4)\n",
|
198 |
-
"Requirement already satisfied: aiosignal>=1.1.2 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from aiohttp->modal) (1.3.1)\n",
|
199 |
-
"Requirement already satisfied: frozenlist>=1.1.1 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from aiohttp->modal) (1.4.1)\n",
|
200 |
-
"Requirement already satisfied: yarl<2.0,>=1.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from aiohttp->modal) (1.9.4)\n",
|
201 |
-
"Requirement already satisfied: starlette<0.38.0,>=0.37.2 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from fastapi->modal) (0.37.2)\n",
|
202 |
-
"Requirement already satisfied: pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from fastapi->modal) (2.7.1)\n",
|
203 |
-
"Requirement already satisfied: fastapi-cli>=0.0.2 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from fastapi->modal) (0.0.2)\n",
|
204 |
-
"Requirement already satisfied: httpx>=0.23.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from fastapi->modal) (0.27.0)\n",
|
205 |
-
"Requirement already satisfied: jinja2>=2.11.2 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from fastapi->modal) (3.1.3)\n",
|
206 |
-
"Requirement already satisfied: python-multipart>=0.0.7 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from fastapi->modal) (0.0.9)\n",
|
207 |
-
"Requirement already satisfied: ujson!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,>=4.0.1 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from fastapi->modal) (5.9.0)\n",
|
208 |
-
"Requirement already satisfied: orjson>=3.2.1 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from fastapi->modal) (3.10.3)\n",
|
209 |
-
"Requirement already satisfied: email_validator>=2.0.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from fastapi->modal) (2.1.1)\n",
|
210 |
-
"Requirement already satisfied: uvicorn>=0.12.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from uvicorn[standard]>=0.12.0->fastapi->modal) (0.29.0)\n",
|
211 |
-
"Requirement already satisfied: anyio>=3.0.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from watchfiles->modal) (4.3.0)\n",
|
212 |
-
"Requirement already satisfied: idna>=2.8 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from anyio>=3.0.0->watchfiles->modal) (3.7)\n",
|
213 |
-
"Requirement already satisfied: sniffio>=1.1 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from anyio>=3.0.0->watchfiles->modal) (1.3.1)\n",
|
214 |
-
"Requirement already satisfied: dnspython>=2.0.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from email_validator>=2.0.0->fastapi->modal) (2.6.1)\n",
|
215 |
-
"Requirement already satisfied: hyperframe<7,>=6.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from h2<5,>=3.1.0->grpclib==0.4.7->modal) (6.0.1)\n",
|
216 |
-
"Requirement already satisfied: hpack<5,>=4.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from h2<5,>=3.1.0->grpclib==0.4.7->modal) (4.0.0)\n",
|
217 |
-
"Requirement already satisfied: httpcore==1.* in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from httpx>=0.23.0->fastapi->modal) (1.0.5)\n",
|
218 |
-
"Requirement already satisfied: h11<0.15,>=0.13 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from httpcore==1.*->httpx>=0.23.0->fastapi->modal) (0.14.0)\n",
|
219 |
-
"Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from jinja2>=2.11.2->fastapi->modal) (2.1.5)\n",
|
220 |
-
"Requirement already satisfied: mdurl~=0.1 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from markdown-it-py>=2.2.0->rich>=12.0.0->modal) (0.1.2)\n",
|
221 |
-
"Requirement already satisfied: annotated-types>=0.4.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi->modal) (0.6.0)\n",
|
222 |
-
"Requirement already satisfied: pydantic-core==2.18.2 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi->modal) (2.18.2)\n",
|
223 |
-
"Requirement already satisfied: httptools>=0.5.0 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from uvicorn[standard]>=0.12.0->fastapi->modal) (0.6.1)\n",
|
224 |
-
"Requirement already satisfied: python-dotenv>=0.13 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from uvicorn[standard]>=0.12.0->fastapi->modal) (1.0.1)\n",
|
225 |
-
"Requirement already satisfied: pyyaml>=5.1 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from uvicorn[standard]>=0.12.0->fastapi->modal) (6.0.1)\n",
|
226 |
-
"Requirement already satisfied: websockets>=10.4 in c:\\users\\frede\\onedrive\\dokumenter\\master\\mlops\\mlops_mod-2\\.conda\\lib\\site-packages (from uvicorn[standard]>=0.12.0->fastapi->modal) (12.0)\n"
|
227 |
-
]
|
228 |
-
}
|
229 |
-
],
|
230 |
-
"source": [
|
231 |
-
"!pip install modal"
|
232 |
-
]
|
233 |
-
},
|
234 |
-
{
|
235 |
-
"cell_type": "code",
|
236 |
-
"execution_count": 5,
|
237 |
-
"metadata": {},
|
238 |
-
"outputs": [],
|
239 |
-
"source": [
|
240 |
-
"#from alpha_vantage.timeseries import TimeSeries\n",
|
241 |
-
"#import pandas as pd\n",
|
242 |
-
"\n",
|
243 |
-
"#load_dotenv()\n",
|
244 |
-
"\n",
|
245 |
-
"#api_key = os.environ.get('stocks_api') # Replace this with your actual API key\n",
|
246 |
-
"#ts = TimeSeries(key=api_key, output_format='pandas')\n",
|
247 |
-
"\n",
|
248 |
-
"# Fetch daily adjusted stock prices; adjust the symbol as needed\n",
|
249 |
-
"#data, meta_data = ts.get_daily(symbol='TSLA', outputsize='full')\n",
|
250 |
-
"\n",
|
251 |
-
"#print(data.head())"
|
252 |
-
]
|
253 |
-
},
|
254 |
-
{
|
255 |
-
"cell_type": "code",
|
256 |
-
"execution_count": 6,
|
257 |
-
"metadata": {},
|
258 |
-
"outputs": [],
|
259 |
-
"source": [
|
260 |
-
"#data.info()"
|
261 |
-
]
|
262 |
-
},
|
263 |
-
{
|
264 |
-
"cell_type": "code",
|
265 |
-
"execution_count": 7,
|
266 |
-
"metadata": {},
|
267 |
-
"outputs": [],
|
268 |
-
"source": [
|
269 |
-
"#meta_data"
|
270 |
-
]
|
271 |
-
},
|
272 |
-
{
|
273 |
-
"cell_type": "code",
|
274 |
-
"execution_count": 8,
|
275 |
-
"metadata": {},
|
276 |
-
"outputs": [],
|
277 |
-
"source": [
|
278 |
-
"# Define your file path and name\n",
|
279 |
-
"#file_path = 'TSLA_stock_price.csv' # Customize the path and filename\n",
|
280 |
-
"\n",
|
281 |
-
"# Save the DataFrame to CSV\n",
|
282 |
-
"#stock_data.to_csv(file_path)\n",
|
283 |
-
"\n",
|
284 |
-
"#print(f\"Data saved to {file_path}\")\n"
|
285 |
-
]
|
286 |
-
},
|
287 |
-
{
|
288 |
-
"cell_type": "code",
|
289 |
-
"execution_count": 9,
|
290 |
-
"metadata": {},
|
291 |
-
"outputs": [
|
292 |
-
{
|
293 |
-
"name": "stdout",
|
294 |
-
"output_type": "stream",
|
295 |
-
"text": [
|
296 |
-
" date 1. open 2. high 3. low 4. close 5. volume ticker\n",
|
297 |
-
"0 2024-05-03 182.10 184.78 178.4200 181.19 75491539.0 TSLA\n",
|
298 |
-
"1 2024-05-02 182.86 184.60 176.0200 180.01 89148041.0 TSLA\n",
|
299 |
-
"2 2024-05-01 182.00 185.86 179.0100 179.99 92829719.0 TSLA\n",
|
300 |
-
"3 2024-04-30 186.98 190.95 182.8401 183.28 127031787.0 TSLA\n",
|
301 |
-
"4 2024-04-29 188.42 198.87 184.5400 194.05 243869678.0 TSLA\n"
|
302 |
-
]
|
303 |
-
}
|
304 |
-
],
|
305 |
-
"source": [
|
306 |
-
"# Load and display the data from CSV to confirm\n",
|
307 |
-
"tsla_df = pd.read_csv('TSLA_stock_price.csv')\n",
|
308 |
-
"print(tsla_df.head())\n",
|
309 |
-
" "
|
310 |
-
]
|
311 |
-
},
|
312 |
-
{
|
313 |
-
"cell_type": "code",
|
314 |
-
"execution_count": 10,
|
315 |
-
"metadata": {},
|
316 |
-
"outputs": [
|
317 |
-
{
|
318 |
-
"name": "stdout",
|
319 |
-
"output_type": "stream",
|
320 |
-
"text": [
|
321 |
-
"Connected. Call `.close()` to terminate connection gracefully.\n",
|
322 |
-
"\n",
|
323 |
-
"Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/693399\n",
|
324 |
-
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
325 |
-
]
|
326 |
-
}
|
327 |
-
],
|
328 |
-
"source": [
|
329 |
-
"api_key = os.environ.get('hopsworks_api')\n",
|
330 |
-
"project = hopsworks.login(api_key_value=api_key)\n",
|
331 |
-
"fs = project.get_feature_store()"
|
332 |
-
]
|
333 |
-
},
|
334 |
-
{
|
335 |
-
"cell_type": "code",
|
336 |
-
"execution_count": 11,
|
337 |
-
"metadata": {},
|
338 |
-
"outputs": [],
|
339 |
-
"source": [
|
340 |
-
"import re "
|
341 |
-
]
|
342 |
-
},
|
343 |
-
{
|
344 |
-
"cell_type": "code",
|
345 |
-
"execution_count": 12,
|
346 |
-
"metadata": {},
|
347 |
-
"outputs": [],
|
348 |
-
"source": [
|
349 |
-
"def clean_column_name(name):\n",
|
350 |
-
" # Remove all non-letter characters\n",
|
351 |
-
" cleaned_name = re.sub(r'[^a-zA-Z]', '', name)\n",
|
352 |
-
" return cleaned_name\n"
|
353 |
-
]
|
354 |
-
},
|
355 |
-
{
|
356 |
-
"cell_type": "code",
|
357 |
-
"execution_count": 13,
|
358 |
-
"metadata": {},
|
359 |
-
"outputs": [
|
360 |
-
{
|
361 |
-
"data": {
|
362 |
-
"text/html": [
|
363 |
-
"<div>\n",
|
364 |
-
"<style scoped>\n",
|
365 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
366 |
-
" vertical-align: middle;\n",
|
367 |
-
" }\n",
|
368 |
-
"\n",
|
369 |
-
" .dataframe tbody tr th {\n",
|
370 |
-
" vertical-align: top;\n",
|
371 |
-
" }\n",
|
372 |
-
"\n",
|
373 |
-
" .dataframe thead th {\n",
|
374 |
-
" text-align: right;\n",
|
375 |
-
" }\n",
|
376 |
-
"</style>\n",
|
377 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
378 |
-
" <thead>\n",
|
379 |
-
" <tr style=\"text-align: right;\">\n",
|
380 |
-
" <th></th>\n",
|
381 |
-
" <th>date</th>\n",
|
382 |
-
" <th>1. open</th>\n",
|
383 |
-
" <th>2. high</th>\n",
|
384 |
-
" <th>3. low</th>\n",
|
385 |
-
" <th>4. close</th>\n",
|
386 |
-
" <th>5. volume</th>\n",
|
387 |
-
" <th>ticker</th>\n",
|
388 |
-
" </tr>\n",
|
389 |
-
" </thead>\n",
|
390 |
-
" <tbody>\n",
|
391 |
-
" <tr>\n",
|
392 |
-
" <th>0</th>\n",
|
393 |
-
" <td>2024-05-03</td>\n",
|
394 |
-
" <td>182.10</td>\n",
|
395 |
-
" <td>184.7800</td>\n",
|
396 |
-
" <td>178.4200</td>\n",
|
397 |
-
" <td>181.19</td>\n",
|
398 |
-
" <td>75491539.0</td>\n",
|
399 |
-
" <td>TSLA</td>\n",
|
400 |
-
" </tr>\n",
|
401 |
-
" <tr>\n",
|
402 |
-
" <th>1</th>\n",
|
403 |
-
" <td>2024-05-02</td>\n",
|
404 |
-
" <td>182.86</td>\n",
|
405 |
-
" <td>184.6000</td>\n",
|
406 |
-
" <td>176.0200</td>\n",
|
407 |
-
" <td>180.01</td>\n",
|
408 |
-
" <td>89148041.0</td>\n",
|
409 |
-
" <td>TSLA</td>\n",
|
410 |
-
" </tr>\n",
|
411 |
-
" <tr>\n",
|
412 |
-
" <th>2</th>\n",
|
413 |
-
" <td>2024-05-01</td>\n",
|
414 |
-
" <td>182.00</td>\n",
|
415 |
-
" <td>185.8600</td>\n",
|
416 |
-
" <td>179.0100</td>\n",
|
417 |
-
" <td>179.99</td>\n",
|
418 |
-
" <td>92829719.0</td>\n",
|
419 |
-
" <td>TSLA</td>\n",
|
420 |
-
" </tr>\n",
|
421 |
-
" <tr>\n",
|
422 |
-
" <th>3</th>\n",
|
423 |
-
" <td>2024-04-30</td>\n",
|
424 |
-
" <td>186.98</td>\n",
|
425 |
-
" <td>190.9500</td>\n",
|
426 |
-
" <td>182.8401</td>\n",
|
427 |
-
" <td>183.28</td>\n",
|
428 |
-
" <td>127031787.0</td>\n",
|
429 |
-
" <td>TSLA</td>\n",
|
430 |
-
" </tr>\n",
|
431 |
-
" <tr>\n",
|
432 |
-
" <th>4</th>\n",
|
433 |
-
" <td>2024-04-29</td>\n",
|
434 |
-
" <td>188.42</td>\n",
|
435 |
-
" <td>198.8700</td>\n",
|
436 |
-
" <td>184.5400</td>\n",
|
437 |
-
" <td>194.05</td>\n",
|
438 |
-
" <td>243869678.0</td>\n",
|
439 |
-
" <td>TSLA</td>\n",
|
440 |
-
" </tr>\n",
|
441 |
-
" <tr>\n",
|
442 |
-
" <th>...</th>\n",
|
443 |
-
" <td>...</td>\n",
|
444 |
-
" <td>...</td>\n",
|
445 |
-
" <td>...</td>\n",
|
446 |
-
" <td>...</td>\n",
|
447 |
-
" <td>...</td>\n",
|
448 |
-
" <td>...</td>\n",
|
449 |
-
" <td>...</td>\n",
|
450 |
-
" </tr>\n",
|
451 |
-
" <tr>\n",
|
452 |
-
" <th>3481</th>\n",
|
453 |
-
" <td>2010-07-06</td>\n",
|
454 |
-
" <td>20.00</td>\n",
|
455 |
-
" <td>20.0000</td>\n",
|
456 |
-
" <td>15.8300</td>\n",
|
457 |
-
" <td>16.11</td>\n",
|
458 |
-
" <td>6866900.0</td>\n",
|
459 |
-
" <td>TSLA</td>\n",
|
460 |
-
" </tr>\n",
|
461 |
-
" <tr>\n",
|
462 |
-
" <th>3482</th>\n",
|
463 |
-
" <td>2010-07-02</td>\n",
|
464 |
-
" <td>23.00</td>\n",
|
465 |
-
" <td>23.1000</td>\n",
|
466 |
-
" <td>18.7100</td>\n",
|
467 |
-
" <td>19.20</td>\n",
|
468 |
-
" <td>5139800.0</td>\n",
|
469 |
-
" <td>TSLA</td>\n",
|
470 |
-
" </tr>\n",
|
471 |
-
" <tr>\n",
|
472 |
-
" <th>3483</th>\n",
|
473 |
-
" <td>2010-07-01</td>\n",
|
474 |
-
" <td>25.00</td>\n",
|
475 |
-
" <td>25.9200</td>\n",
|
476 |
-
" <td>20.2700</td>\n",
|
477 |
-
" <td>21.96</td>\n",
|
478 |
-
" <td>8218800.0</td>\n",
|
479 |
-
" <td>TSLA</td>\n",
|
480 |
-
" </tr>\n",
|
481 |
-
" <tr>\n",
|
482 |
-
" <th>3484</th>\n",
|
483 |
-
" <td>2010-06-30</td>\n",
|
484 |
-
" <td>25.79</td>\n",
|
485 |
-
" <td>30.4192</td>\n",
|
486 |
-
" <td>23.3000</td>\n",
|
487 |
-
" <td>23.83</td>\n",
|
488 |
-
" <td>17187100.0</td>\n",
|
489 |
-
" <td>TSLA</td>\n",
|
490 |
-
" </tr>\n",
|
491 |
-
" <tr>\n",
|
492 |
-
" <th>3485</th>\n",
|
493 |
-
" <td>2010-06-29</td>\n",
|
494 |
-
" <td>19.00</td>\n",
|
495 |
-
" <td>25.0000</td>\n",
|
496 |
-
" <td>17.5400</td>\n",
|
497 |
-
" <td>23.89</td>\n",
|
498 |
-
" <td>18766300.0</td>\n",
|
499 |
-
" <td>TSLA</td>\n",
|
500 |
-
" </tr>\n",
|
501 |
-
" </tbody>\n",
|
502 |
-
"</table>\n",
|
503 |
-
"<p>3486 rows Γ 7 columns</p>\n",
|
504 |
-
"</div>"
|
505 |
-
],
|
506 |
-
"text/plain": [
|
507 |
-
" date 1. open 2. high 3. low 4. close 5. volume ticker\n",
|
508 |
-
"0 2024-05-03 182.10 184.7800 178.4200 181.19 75491539.0 TSLA\n",
|
509 |
-
"1 2024-05-02 182.86 184.6000 176.0200 180.01 89148041.0 TSLA\n",
|
510 |
-
"2 2024-05-01 182.00 185.8600 179.0100 179.99 92829719.0 TSLA\n",
|
511 |
-
"3 2024-04-30 186.98 190.9500 182.8401 183.28 127031787.0 TSLA\n",
|
512 |
-
"4 2024-04-29 188.42 198.8700 184.5400 194.05 243869678.0 TSLA\n",
|
513 |
-
"... ... ... ... ... ... ... ...\n",
|
514 |
-
"3481 2010-07-06 20.00 20.0000 15.8300 16.11 6866900.0 TSLA\n",
|
515 |
-
"3482 2010-07-02 23.00 23.1000 18.7100 19.20 5139800.0 TSLA\n",
|
516 |
-
"3483 2010-07-01 25.00 25.9200 20.2700 21.96 8218800.0 TSLA\n",
|
517 |
-
"3484 2010-06-30 25.79 30.4192 23.3000 23.83 17187100.0 TSLA\n",
|
518 |
-
"3485 2010-06-29 19.00 25.0000 17.5400 23.89 18766300.0 TSLA\n",
|
519 |
-
"\n",
|
520 |
-
"[3486 rows x 7 columns]"
|
521 |
-
]
|
522 |
-
},
|
523 |
-
"execution_count": 13,
|
524 |
-
"metadata": {},
|
525 |
-
"output_type": "execute_result"
|
526 |
-
}
|
527 |
-
],
|
528 |
-
"source": [
|
529 |
-
"tsla_df"
|
530 |
-
]
|
531 |
-
},
|
532 |
-
{
|
533 |
-
"cell_type": "code",
|
534 |
-
"execution_count": 14,
|
535 |
-
"metadata": {},
|
536 |
-
"outputs": [],
|
537 |
-
"source": [
|
538 |
-
"# Assuming 'tsla_df' is your DataFrame\n",
|
539 |
-
"tsla_df.columns = [clean_column_name(col) for col in tsla_df.columns]\n"
|
540 |
-
]
|
541 |
-
},
|
542 |
-
{
|
543 |
-
"cell_type": "code",
|
544 |
-
"execution_count": 15,
|
545 |
-
"metadata": {},
|
546 |
-
"outputs": [
|
547 |
-
{
|
548 |
-
"name": "stdout",
|
549 |
-
"output_type": "stream",
|
550 |
-
"text": [
|
551 |
-
"Index(['date', 'open', 'high', 'low', 'close', 'volume', 'ticker'], dtype='object')\n"
|
552 |
-
]
|
553 |
-
}
|
554 |
-
],
|
555 |
-
"source": [
|
556 |
-
"print(tsla_df.columns)\n"
|
557 |
-
]
|
558 |
-
},
|
559 |
-
{
|
560 |
-
"cell_type": "code",
|
561 |
-
"execution_count": 16,
|
562 |
-
"metadata": {},
|
563 |
-
"outputs": [],
|
564 |
-
"source": [
|
565 |
-
"import pandas as pd\n",
|
566 |
-
"\n",
|
567 |
-
"# Assuming tsla_df is your pandas DataFrame\n",
|
568 |
-
"# Convert the \"date\" column to timestamp\n",
|
569 |
-
"tsla_df['date'] = pd.to_datetime(tsla_df['date'])\n"
|
570 |
-
]
|
571 |
-
},
|
572 |
-
{
|
573 |
-
"cell_type": "code",
|
574 |
-
"execution_count": 17,
|
575 |
-
"metadata": {},
|
576 |
-
"outputs": [
|
577 |
-
{
|
578 |
-
"name": "stdout",
|
579 |
-
"output_type": "stream",
|
580 |
-
"text": [
|
581 |
-
"2024-05-06 13:43:00,985 WARNING: DeprecationWarning: Providing event_time as a single-element list is deprecated and will be dropped in future versions. Provide the feature_name string instead.\n",
|
582 |
-
"\n"
|
583 |
-
]
|
584 |
-
}
|
585 |
-
],
|
586 |
-
"source": [
|
587 |
-
"# Define a feature group\n",
|
588 |
-
"tesla_fg = fs.get_or_create_feature_group(\n",
|
589 |
-
" name=\"tesla_stock\",\n",
|
590 |
-
" description=\"Tesla stock dataset from alpha vantage\",\n",
|
591 |
-
" version=1,\n",
|
592 |
-
" primary_key=[\"ticker\"],\n",
|
593 |
-
" event_time=['date'],\n",
|
594 |
-
" online_enabled=False,\n",
|
595 |
-
")"
|
596 |
-
]
|
597 |
-
},
|
598 |
-
{
|
599 |
-
"cell_type": "code",
|
600 |
-
"execution_count": 18,
|
601 |
-
"metadata": {},
|
602 |
-
"outputs": [
|
603 |
-
{
|
604 |
-
"name": "stdout",
|
605 |
-
"output_type": "stream",
|
606 |
-
"text": [
|
607 |
-
"Feature Group created successfully, explore it at \n",
|
608 |
-
"https://c.app.hopsworks.ai:443/p/693399/fs/689222/fg/786781\n"
|
609 |
-
]
|
610 |
-
},
|
611 |
-
{
|
612 |
-
"data": {
|
613 |
-
"application/vnd.jupyter.widget-view+json": {
|
614 |
-
"model_id": "b3248b9d522a467db9ce202ef5815fe9",
|
615 |
-
"version_major": 2,
|
616 |
-
"version_minor": 0
|
617 |
-
},
|
618 |
-
"text/plain": [
|
619 |
-
"Uploading Dataframe: 0.00% | | Rows 0/3486 | Elapsed Time: 00:00 | Remaining Time: ?"
|
620 |
-
]
|
621 |
-
},
|
622 |
-
"metadata": {},
|
623 |
-
"output_type": "display_data"
|
624 |
-
},
|
625 |
-
{
|
626 |
-
"name": "stdout",
|
627 |
-
"output_type": "stream",
|
628 |
-
"text": [
|
629 |
-
"Launching job: tesla_stock_1_offline_fg_materialization\n",
|
630 |
-
"Job started successfully, you can follow the progress at \n",
|
631 |
-
"https://c.app.hopsworks.ai/p/693399/jobs/named/tesla_stock_1_offline_fg_materialization/executions\n"
|
632 |
-
]
|
633 |
-
},
|
634 |
-
{
|
635 |
-
"data": {
|
636 |
-
"text/plain": [
|
637 |
-
"(<hsfs.core.job.Job at 0x19cffe27490>, None)"
|
638 |
-
]
|
639 |
-
},
|
640 |
-
"execution_count": 18,
|
641 |
-
"metadata": {},
|
642 |
-
"output_type": "execute_result"
|
643 |
-
}
|
644 |
-
],
|
645 |
-
"source": [
|
646 |
-
"tesla_fg.insert(tsla_df, write_options={\"wait_for_job\" : False})"
|
647 |
-
]
|
648 |
-
},
|
649 |
-
{
|
650 |
-
"cell_type": "code",
|
651 |
-
"execution_count": 19,
|
652 |
-
"metadata": {},
|
653 |
-
"outputs": [],
|
654 |
-
"source": [
|
655 |
-
"news_df = pd.read_csv('news_articles_ema.csv')\n"
|
656 |
-
]
|
657 |
-
},
|
658 |
-
{
|
659 |
-
"cell_type": "code",
|
660 |
-
"execution_count": 20,
|
661 |
-
"metadata": {},
|
662 |
-
"outputs": [],
|
663 |
-
"source": [
|
664 |
-
"news_df_updated = news_df.drop(columns=['exp_mean_7_days'])"
|
665 |
-
]
|
666 |
-
},
|
667 |
-
{
|
668 |
-
"cell_type": "code",
|
669 |
-
"execution_count": 21,
|
670 |
-
"metadata": {},
|
671 |
-
"outputs": [],
|
672 |
-
"source": [
|
673 |
-
"news_df_updated['date'] = pd.to_datetime(news_df_updated['date'])"
|
674 |
-
]
|
675 |
-
},
|
676 |
-
{
|
677 |
-
"cell_type": "code",
|
678 |
-
"execution_count": 22,
|
679 |
-
"metadata": {},
|
680 |
-
"outputs": [
|
681 |
-
{
|
682 |
-
"name": "stdout",
|
683 |
-
"output_type": "stream",
|
684 |
-
"text": [
|
685 |
-
"2024-05-06 13:43:12,343 WARNING: DeprecationWarning: Providing event_time as a single-element list is deprecated and will be dropped in future versions. Provide the feature_name string instead.\n",
|
686 |
-
"\n"
|
687 |
-
]
|
688 |
-
}
|
689 |
-
],
|
690 |
-
"source": [
|
691 |
-
"news_sentiment_fg = fs.get_or_create_feature_group(\n",
|
692 |
-
" name='news_sentiment_updated',\n",
|
693 |
-
" description='News sentiment from Polygon',\n",
|
694 |
-
" version=1,\n",
|
695 |
-
" primary_key=['ticker'],\n",
|
696 |
-
" event_time=['date'],\n",
|
697 |
-
" online_enabled=False,\n",
|
698 |
-
")"
|
699 |
-
]
|
700 |
-
},
|
701 |
-
{
|
702 |
-
"cell_type": "code",
|
703 |
-
"execution_count": 23,
|
704 |
-
"metadata": {},
|
705 |
-
"outputs": [
|
706 |
-
{
|
707 |
-
"name": "stdout",
|
708 |
-
"output_type": "stream",
|
709 |
-
"text": [
|
710 |
-
"Feature Group created successfully, explore it at \n",
|
711 |
-
"https://c.app.hopsworks.ai:443/p/693399/fs/689222/fg/787796\n"
|
712 |
-
]
|
713 |
-
},
|
714 |
-
{
|
715 |
-
"data": {
|
716 |
-
"application/vnd.jupyter.widget-view+json": {
|
717 |
-
"model_id": "524bb5481c34441ba708a4c14edac44b",
|
718 |
-
"version_major": 2,
|
719 |
-
"version_minor": 0
|
720 |
-
},
|
721 |
-
"text/plain": [
|
722 |
-
"Uploading Dataframe: 0.00% | | Rows 0/66 | Elapsed Time: 00:00 | Remaining Time: ?"
|
723 |
-
]
|
724 |
-
},
|
725 |
-
"metadata": {},
|
726 |
-
"output_type": "display_data"
|
727 |
-
},
|
728 |
-
{
|
729 |
-
"name": "stdout",
|
730 |
-
"output_type": "stream",
|
731 |
-
"text": [
|
732 |
-
"Launching job: news_sentiment_updated_1_offline_fg_materialization\n",
|
733 |
-
"Job started successfully, you can follow the progress at \n",
|
734 |
-
"https://c.app.hopsworks.ai/p/693399/jobs/named/news_sentiment_updated_1_offline_fg_materialization/executions\n"
|
735 |
-
]
|
736 |
-
},
|
737 |
-
{
|
738 |
-
"data": {
|
739 |
-
"text/plain": [
|
740 |
-
"(<hsfs.core.job.Job at 0x19c811c2e90>, None)"
|
741 |
-
]
|
742 |
-
},
|
743 |
-
"execution_count": 23,
|
744 |
-
"metadata": {},
|
745 |
-
"output_type": "execute_result"
|
746 |
-
}
|
747 |
-
],
|
748 |
-
"source": [
|
749 |
-
"news_sentiment_fg.insert(news_df_updated)"
|
750 |
-
]
|
751 |
-
}
|
752 |
-
],
|
753 |
-
"metadata": {
|
754 |
-
"kernelspec": {
|
755 |
-
"display_name": "base",
|
756 |
-
"language": "python",
|
757 |
-
"name": "python3"
|
758 |
-
},
|
759 |
-
"language_info": {
|
760 |
-
"codemirror_mode": {
|
761 |
-
"name": "ipython",
|
762 |
-
"version": 3
|
763 |
-
},
|
764 |
-
"file_extension": ".py",
|
765 |
-
"mimetype": "text/x-python",
|
766 |
-
"name": "python",
|
767 |
-
"nbconvert_exporter": "python",
|
768 |
-
"pygments_lexer": "ipython3",
|
769 |
-
"version": "3.11.9"
|
770 |
-
},
|
771 |
-
"orig_nbformat": 4
|
772 |
-
},
|
773 |
-
"nbformat": 4,
|
774 |
-
"nbformat_minor": 2
|
775 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
feature_view_freddie.py
DELETED
@@ -1,95 +0,0 @@
|
|
1 |
-
# %%
|
2 |
-
# Import necessary libraries
|
3 |
-
import pandas as pd # For data manipulation using DataFrames
|
4 |
-
import numpy as np # For numerical operations
|
5 |
-
import matplotlib.pyplot as plt # For data visualization
|
6 |
-
import os # For operating system-related tasks
|
7 |
-
import joblib # For saving and loading models
|
8 |
-
import hopsworks # For getting access to hopsworks
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
# Import specific modules from scikit-learn
|
13 |
-
from sklearn.preprocessing import StandardScaler, OneHotEncoder # For data preprocessing
|
14 |
-
from sklearn.metrics import accuracy_score # For evaluating model accuracy
|
15 |
-
|
16 |
-
# %%
|
17 |
-
from feature_pipeline import tesla_fg
|
18 |
-
from feature_pipeline import news_sentiment_fg
|
19 |
-
|
20 |
-
# %%
|
21 |
-
from dotenv import load_dotenv
|
22 |
-
import os
|
23 |
-
|
24 |
-
load_dotenv()
|
25 |
-
|
26 |
-
# %%
|
27 |
-
api_key = os.environ.get('hopsworks_api')
|
28 |
-
project = hopsworks.login(api_key_value=api_key)
|
29 |
-
fs = project.get_feature_store()
|
30 |
-
|
31 |
-
# %%
|
32 |
-
def create_stocks_feature_view(fs, version):
|
33 |
-
|
34 |
-
# Loading in the feature groups
|
35 |
-
tesla_fg = fs.get_feature_group('tesla_stock', version=1)
|
36 |
-
news_sentiment_fg = fs.get_feature_group('news_sentiment_updated', version=1)
|
37 |
-
|
38 |
-
# Define the query
|
39 |
-
ds_query = tesla_fg.select(['date', 'open', 'ticker'])\
|
40 |
-
.join(news_sentiment_fg.select(['date','sentiment']))
|
41 |
-
|
42 |
-
# Create the feature view
|
43 |
-
feature_view = fs.create_feature_view(
|
44 |
-
name='tesla_stocks_fv',
|
45 |
-
query=ds_query,
|
46 |
-
labels=['ticker']
|
47 |
-
)
|
48 |
-
|
49 |
-
return feature_view, tesla_fg
|
50 |
-
|
51 |
-
# %%
|
52 |
-
try:
|
53 |
-
feature_view = fs.get_feature_view("tesla_stocks_fv", version=1)
|
54 |
-
tesla_fg = fs.get_feature_group('tesla_stock', version=1)
|
55 |
-
except:
|
56 |
-
feature_view, tesla_fg = create_stocks_feature_view(fs, 1)
|
57 |
-
|
58 |
-
# %%
|
59 |
-
def fix_data_from_feature_view(df,start_date,end_date):
|
60 |
-
df = df.sort_values("date")
|
61 |
-
df = df.reset_index()
|
62 |
-
df = df.drop(columns=["index"])
|
63 |
-
|
64 |
-
# Create a boolean mask for rows that fall within the date range
|
65 |
-
mask = (pd.to_datetime(df['date']) >= pd.to_datetime(start_date)) & (pd.to_datetime(df['date']) <= pd.to_datetime(end_date))
|
66 |
-
len_df = np.shape(df)
|
67 |
-
df = df[mask] # Use the boolean mask to filter the DataFrame
|
68 |
-
print('From shape {} to {} after cropping to given date range: {} to {}'.format(len_df,np.shape(df),start_date,end_date))
|
69 |
-
|
70 |
-
return df
|
71 |
-
|
72 |
-
# %%
|
73 |
-
#def create_stocks_feature_view(fs, version):
|
74 |
-
|
75 |
-
#Loading in the feature groups
|
76 |
-
# tesla_fg = fs.get_feature_group('tesla_stock', version = 3)
|
77 |
-
# news_sentiment_fg = fs.get_feature_group('news_sentiment_updated', version = 2)
|
78 |
-
|
79 |
-
# ds_query = tesla_fg.select(['date','open', 'ticker'])\
|
80 |
-
# .join(news_sentiment_fg.select_except(['ticker','time', 'amp_url', 'image_url']))
|
81 |
-
|
82 |
-
# return (fs.create_tesla_feature_view(
|
83 |
-
# name = 'tsla_stocks_fv',
|
84 |
-
# query = ds_query,
|
85 |
-
# labels=['ticker']
|
86 |
-
# ), tesla_fg)
|
87 |
-
|
88 |
-
# %%
|
89 |
-
#try:
|
90 |
-
# feature_view = fs.get_feature_view("tsla_stocks_fv", version=1)
|
91 |
-
# tesla_fg = fs.get_feature_group('tesla_stock', version=3)
|
92 |
-
#except:
|
93 |
-
# feature_view, tesla_fg = create_stocks_feature_view(fs, 1)
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
historical_stock.ipynb
DELETED
@@ -1,257 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "code",
|
5 |
-
"execution_count": 1,
|
6 |
-
"metadata": {},
|
7 |
-
"outputs": [
|
8 |
-
{
|
9 |
-
"ename": "ModuleNotFoundError",
|
10 |
-
"evalue": "No module named 'modal'",
|
11 |
-
"output_type": "error",
|
12 |
-
"traceback": [
|
13 |
-
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
14 |
-
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
|
15 |
-
"Cell \u001b[1;32mIn[1], line 7\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mhopsworks\u001b[39;00m\n\u001b[0;32m 6\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mre\u001b[39;00m \n\u001b[1;32m----> 7\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmodal\u001b[39;00m \n\u001b[0;32m 8\u001b[0m \u001b[38;5;66;03m#prepocessing\u001b[39;00m\n\u001b[0;32m 9\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mrequests\u001b[39;00m\n",
|
16 |
-
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'modal'"
|
17 |
-
]
|
18 |
-
}
|
19 |
-
],
|
20 |
-
"source": [
|
21 |
-
"from dotenv import load_dotenv\n",
|
22 |
-
"import os \n",
|
23 |
-
"from alpha_vantage.timeseries import TimeSeries\n",
|
24 |
-
"import pandas as pd\n",
|
25 |
-
"import hopsworks\n",
|
26 |
-
"import re \n",
|
27 |
-
"import modal \n",
|
28 |
-
"#prepocessing\n",
|
29 |
-
"import requests\n",
|
30 |
-
"import pandas as pd\n",
|
31 |
-
"import json\n",
|
32 |
-
"#import pandas_market_calendars as mcal\n",
|
33 |
-
"import datetime\n",
|
34 |
-
"import numpy as np\n",
|
35 |
-
"from datetime import timedelta \n"
|
36 |
-
]
|
37 |
-
},
|
38 |
-
{
|
39 |
-
"cell_type": "code",
|
40 |
-
"execution_count": null,
|
41 |
-
"metadata": {},
|
42 |
-
"outputs": [
|
43 |
-
{
|
44 |
-
"name": "stdout",
|
45 |
-
"output_type": "stream",
|
46 |
-
"text": [
|
47 |
-
" 1. open 2. high 3. low 4. close 5. volume ticker\n",
|
48 |
-
"date \n",
|
49 |
-
"2024-05-03 182.10 184.78 178.4200 181.19 75491539.0 TSLA\n",
|
50 |
-
"2024-05-02 182.86 184.60 176.0200 180.01 89148041.0 TSLA\n",
|
51 |
-
"2024-05-01 182.00 185.86 179.0100 179.99 92829719.0 TSLA\n",
|
52 |
-
"2024-04-30 186.98 190.95 182.8401 183.28 127031787.0 TSLA\n",
|
53 |
-
"2024-04-29 188.42 198.87 184.5400 194.05 243869678.0 TSLA\n"
|
54 |
-
]
|
55 |
-
}
|
56 |
-
],
|
57 |
-
"source": [
|
58 |
-
"\n",
|
59 |
-
"\n",
|
60 |
-
"load_dotenv()\n",
|
61 |
-
"\n",
|
62 |
-
"api_key = os.environ.get('stocks_api') # Replace this with your actual API key\n",
|
63 |
-
"ts = TimeSeries(key=api_key, output_format='pandas')\n",
|
64 |
-
"\n",
|
65 |
-
"def fetch_stock_prices(symbol):\n",
|
66 |
-
" # Fetch daily adjusted stock prices; adjust the symbol as needed\n",
|
67 |
-
" data, meta_data = ts.get_daily(symbol=symbol, outputsize='full')\n",
|
68 |
-
" \n",
|
69 |
-
" # Add a new column named 'ticker' and fill it with the ticker name\n",
|
70 |
-
" data['ticker'] = symbol\n",
|
71 |
-
" \n",
|
72 |
-
" return data\n",
|
73 |
-
"\n",
|
74 |
-
"# Example usage\n",
|
75 |
-
"symbol = 'TSLA'\n",
|
76 |
-
"stock_data = fetch_stock_prices(symbol)\n",
|
77 |
-
"print(stock_data.head())\n"
|
78 |
-
]
|
79 |
-
},
|
80 |
-
{
|
81 |
-
"cell_type": "code",
|
82 |
-
"execution_count": null,
|
83 |
-
"metadata": {},
|
84 |
-
"outputs": [],
|
85 |
-
"source": [
|
86 |
-
"def create_tsla_history():\n",
|
87 |
-
"\n",
|
88 |
-
" start_date = datetime.datetime.strptime('2015-07-16',\"%Y-%m-%d\")\n",
|
89 |
-
" end_date = datetime.datetime.strptime('2023-01-05',\"%Y-%m-%d\")\n",
|
90 |
-
"\n",
|
91 |
-
" # Get the TSLA stock data from yfinance\n",
|
92 |
-
" tsla = Ticker(\"TSLA\")\n",
|
93 |
-
" # info = tsla.info\n",
|
94 |
-
"\n",
|
95 |
-
" # get historical market data\n",
|
96 |
-
" data = tsla.history(start=start_date, end=end_date)\n",
|
97 |
-
"\n",
|
98 |
-
" # drop some columns\n",
|
99 |
-
" tesla_df = data.drop(columns=['Dividends','Stock Splits'])\n",
|
100 |
-
" tesla_df.index = tesla_df.index.strftime('%Y-%m-%d')\n",
|
101 |
-
" \n",
|
102 |
-
" print('Number of business days included in data set: ',np.shape(tesla_df))\n",
|
103 |
-
"\n",
|
104 |
-
" # Create an array of all dates in the specified period\n",
|
105 |
-
" all_dates = np.array([start_date + datetime.timedelta(days=i) for i in range((end_date - start_date).days)])\n",
|
106 |
-
" all_dates = [d.strftime('%Y-%m-%d') for d in all_dates]\n",
|
107 |
-
"\n",
|
108 |
-
" # Use setdiff1d() to find the non-business days\n",
|
109 |
-
" isBusinessDay, _ = extract_business_day(start_date='2015-07-16',end_date='2023-01-04')\n",
|
110 |
-
" non_business_days = np.setdiff1d(all_dates, isBusinessDay)\n",
|
111 |
-
"\n",
|
112 |
-
" # Add nan-values to the non-business days\n",
|
113 |
-
" print('Add {} non business days with NaN-values'.format(len(non_business_days)))\n",
|
114 |
-
" for d in non_business_days:\n",
|
115 |
-
" tesla_df.loc[d,:] = [np.nan,np.nan,np.nan,np.nan,np.nan]\n",
|
116 |
-
"\n",
|
117 |
-
" # sort index (dates)\n",
|
118 |
-
" tesla_df = tesla_df.sort_index()\n",
|
119 |
-
" \n",
|
120 |
-
" # move \"date\"-index into its own column\n",
|
121 |
-
" tesla_df = tesla_df.reset_index()\n",
|
122 |
-
" \n",
|
123 |
-
" # Rename column 'Date' to 'date'\n",
|
124 |
-
" tesla_df = tesla_df.rename(columns={'Date': 'date'})\n",
|
125 |
-
" print('Final size of dataframe',np.shape(tesla_df))\n",
|
126 |
-
" \n",
|
127 |
-
" # Write the merged dataframe to a CSV file\n",
|
128 |
-
" start_date ='2022-04-01'\n",
|
129 |
-
" end_date = '2024-04-01'\n",
|
130 |
-
" save_path = \"data/stock/tesla_{}-{}.csv\".format(start_date,end_date)\n",
|
131 |
-
" \n",
|
132 |
-
" print('Save at :',save_path)\n",
|
133 |
-
" tesla_df.to_csv(save_path, index=False)\n",
|
134 |
-
" \n",
|
135 |
-
" return tesla_df"
|
136 |
-
]
|
137 |
-
},
|
138 |
-
{
|
139 |
-
"cell_type": "code",
|
140 |
-
"execution_count": null,
|
141 |
-
"metadata": {},
|
142 |
-
"outputs": [],
|
143 |
-
"source": [
|
144 |
-
"def extract_business_day(start_date,end_date):\n",
|
145 |
-
" \"\"\"\n",
|
146 |
-
" Given a start_date and end_date.\n",
|
147 |
-
" \n",
|
148 |
-
" `Returns`:\n",
|
149 |
-
" \n",
|
150 |
-
" isBusinessDay: list of str (with all dates being business days)\n",
|
151 |
-
" is_open: boolean list\n",
|
152 |
-
" e.g is_open = [1,0,...,1] means that start_date = open, day after start_date = closed, and end_date = open\n",
|
153 |
-
" \"\"\"\n",
|
154 |
-
" \n",
|
155 |
-
" # Save for later\n",
|
156 |
-
" end_date_save = end_date\n",
|
157 |
-
" \n",
|
158 |
-
" # Get the NYSE calendar\n",
|
159 |
-
" cal = mcal.get_calendar('NYSE')\n",
|
160 |
-
"\n",
|
161 |
-
" # Get the NYSE calendar's open and close times for the specified period\n",
|
162 |
-
" schedule = cal.schedule(start_date=start_date, end_date=end_date)\n",
|
163 |
-
" \n",
|
164 |
-
" # Only need a list of dates when it's open (not open and close times)\n",
|
165 |
-
" isBusinessDay = np.array(schedule.market_open.dt.strftime('%Y-%m-%d')) \n",
|
166 |
-
" \n",
|
167 |
-
" # Go over all days: \n",
|
168 |
-
" delta = datetime.timedelta(days=1)\n",
|
169 |
-
" start_date = datetime.datetime.strptime(start_date,\"%Y-%m-%d\") #datetime.date(2015, 7, 16)\n",
|
170 |
-
" end_date = datetime.datetime.strptime(end_date,\"%Y-%m-%d\") #datetime.date(2023, 1, 4)\n",
|
171 |
-
" \n",
|
172 |
-
" # Extract days from the timedelta object\n",
|
173 |
-
" num_days = (end_date - start_date).days + 1\n",
|
174 |
-
" \n",
|
175 |
-
" # Create boolean array for days being open (1) and closed (0) \n",
|
176 |
-
" is_open = np.zeros(num_days)\n",
|
177 |
-
" \n",
|
178 |
-
" # iterate over range of dates\n",
|
179 |
-
" current_BusinessDay = isBusinessDay[0]\n",
|
180 |
-
" count_dates = 0\n",
|
181 |
-
" next_BusinessDay = 0\n",
|
182 |
-
" \n",
|
183 |
-
" while (start_date <= end_date):\n",
|
184 |
-
" \n",
|
185 |
-
" if start_date.strftime('%Y-%m-%d') == current_BusinessDay:\n",
|
186 |
-
" is_open[count_dates] = True\n",
|
187 |
-
"\n",
|
188 |
-
" if current_BusinessDay == end_date_save or current_BusinessDay==isBusinessDay[-1]:\n",
|
189 |
-
" break\n",
|
190 |
-
" else:\n",
|
191 |
-
" next_BusinessDay += 1\n",
|
192 |
-
" current_BusinessDay = isBusinessDay[next_BusinessDay]\n",
|
193 |
-
" else:\n",
|
194 |
-
" is_open[count_dates] = False\n",
|
195 |
-
"\n",
|
196 |
-
" count_dates += 1 \n",
|
197 |
-
" start_date += delta\n",
|
198 |
-
" \n",
|
199 |
-
" print(np.shape(is_open))\n",
|
200 |
-
" \n",
|
201 |
-
" return isBusinessDay, is_open"
|
202 |
-
]
|
203 |
-
},
|
204 |
-
{
|
205 |
-
"cell_type": "code",
|
206 |
-
"execution_count": null,
|
207 |
-
"metadata": {},
|
208 |
-
"outputs": [],
|
209 |
-
"source": []
|
210 |
-
},
|
211 |
-
{
|
212 |
-
"cell_type": "code",
|
213 |
-
"execution_count": null,
|
214 |
-
"metadata": {},
|
215 |
-
"outputs": [
|
216 |
-
{
|
217 |
-
"name": "stdout",
|
218 |
-
"output_type": "stream",
|
219 |
-
"text": [
|
220 |
-
"Data saved to TSLA_stock_price.csv\n"
|
221 |
-
]
|
222 |
-
}
|
223 |
-
],
|
224 |
-
"source": [
|
225 |
-
"# Define your file path and name\n",
|
226 |
-
"file_path = 'TSLA_stock_price.csv' # Customize the path and filename\n",
|
227 |
-
"\n",
|
228 |
-
"# Save the DataFrame to CSV\n",
|
229 |
-
"stock_data.to_csv(file_path)\n",
|
230 |
-
"\n",
|
231 |
-
"print(f\"Data saved to {file_path}\")"
|
232 |
-
]
|
233 |
-
}
|
234 |
-
],
|
235 |
-
"metadata": {
|
236 |
-
"kernelspec": {
|
237 |
-
"display_name": "base",
|
238 |
-
"language": "python",
|
239 |
-
"name": "python3"
|
240 |
-
},
|
241 |
-
"language_info": {
|
242 |
-
"codemirror_mode": {
|
243 |
-
"name": "ipython",
|
244 |
-
"version": 3
|
245 |
-
},
|
246 |
-
"file_extension": ".py",
|
247 |
-
"mimetype": "text/x-python",
|
248 |
-
"name": "python",
|
249 |
-
"nbconvert_exporter": "python",
|
250 |
-
"pygments_lexer": "ipython3",
|
251 |
-
"version": "3.11.9"
|
252 |
-
},
|
253 |
-
"orig_nbformat": 4
|
254 |
-
},
|
255 |
-
"nbformat": 4,
|
256 |
-
"nbformat_minor": 2
|
257 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -19,3 +19,4 @@ textblob
|
|
19 |
great_expectations==0.18.12
|
20 |
prophet
|
21 |
tensorflow
|
|
|
|
19 |
great_expectations==0.18.12
|
20 |
prophet
|
21 |
tensorflow
|
22 |
+
pandas_market_calendars
|
training_pipeline.ipynb
DELETED
@@ -1,167 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "code",
|
5 |
-
"execution_count": 4,
|
6 |
-
"metadata": {},
|
7 |
-
"outputs": [
|
8 |
-
{
|
9 |
-
"data": {
|
10 |
-
"text/plain": [
|
11 |
-
"True"
|
12 |
-
]
|
13 |
-
},
|
14 |
-
"execution_count": 4,
|
15 |
-
"metadata": {},
|
16 |
-
"output_type": "execute_result"
|
17 |
-
}
|
18 |
-
],
|
19 |
-
"source": [
|
20 |
-
"import hopsworks\n",
|
21 |
-
"from dotenv import load_dotenv\n",
|
22 |
-
"import os\n",
|
23 |
-
"\n",
|
24 |
-
"load_dotenv()"
|
25 |
-
]
|
26 |
-
},
|
27 |
-
{
|
28 |
-
"cell_type": "code",
|
29 |
-
"execution_count": 21,
|
30 |
-
"metadata": {},
|
31 |
-
"outputs": [
|
32 |
-
{
|
33 |
-
"name": "stdout",
|
34 |
-
"output_type": "stream",
|
35 |
-
"text": [
|
36 |
-
"Connected. Call `.close()` to terminate connection gracefully.\n",
|
37 |
-
"\n",
|
38 |
-
"Sample data from the feature view:\n",
|
39 |
-
"<class 'tuple'>\n",
|
40 |
-
"( date open sentiment\n",
|
41 |
-
"0 2023-06-26T00:00:00.000Z 250.065 0.119444\n",
|
42 |
-
"1 2023-07-25T00:00:00.000Z 272.380 0.119444\n",
|
43 |
-
"2 2023-01-10T00:00:00.000Z 121.070 0.102207\n",
|
44 |
-
"3 2023-05-11T00:00:00.000Z 168.700 0.141296\n",
|
45 |
-
"4 2023-08-01T00:00:00.000Z 266.260 0.011111\n",
|
46 |
-
".. ... ... ...\n",
|
47 |
-
"464 2022-12-22T00:00:00.000Z 136.000 0.102207\n",
|
48 |
-
"465 2023-08-23T00:00:00.000Z 229.340 0.024046\n",
|
49 |
-
"466 2022-09-08T00:00:00.000Z 281.300 0.087306\n",
|
50 |
-
"467 2023-07-06T00:00:00.000Z 278.090 0.119444\n",
|
51 |
-
"468 2023-10-27T00:00:00.000Z 210.600 0.164868\n",
|
52 |
-
"\n",
|
53 |
-
"[469 rows x 3 columns], ticker\n",
|
54 |
-
"0 TSLA\n",
|
55 |
-
"1 TSLA\n",
|
56 |
-
"2 TSLA\n",
|
57 |
-
"3 TSLA\n",
|
58 |
-
"4 TSLA\n",
|
59 |
-
".. ...\n",
|
60 |
-
"464 TSLA\n",
|
61 |
-
"465 TSLA\n",
|
62 |
-
"466 TSLA\n",
|
63 |
-
"467 TSLA\n",
|
64 |
-
"468 TSLA\n",
|
65 |
-
"\n",
|
66 |
-
"[469 rows x 1 columns])\n"
|
67 |
-
]
|
68 |
-
}
|
69 |
-
],
|
70 |
-
"source": [
|
71 |
-
"import hsfs\n",
|
72 |
-
"\n",
|
73 |
-
"# Connection setup\n",
|
74 |
-
"# Connect to Hopsworks\n",
|
75 |
-
"api_key = os.getenv('hopsworks_api')\n",
|
76 |
-
"connection = hsfs.connection()\n",
|
77 |
-
"fs = connection.get_feature_store()\n",
|
78 |
-
"\n",
|
79 |
-
"# Get feature view\n",
|
80 |
-
"feature_view = fs.get_feature_view(\n",
|
81 |
-
" name='tesla_stocks_fv',\n",
|
82 |
-
" version=1\n",
|
83 |
-
")\n",
|
84 |
-
"td_version, td_job = feature_view.create_train_test_split(\n",
|
85 |
-
" description = 'tesla and news sentiment training dataset',\n",
|
86 |
-
" data_format = \"csv\",\n",
|
87 |
-
" test_size = 0.2,\n",
|
88 |
-
" coalesce = True,\n",
|
89 |
-
" statistics_config={\n",
|
90 |
-
" \"enabled\": True,\n",
|
91 |
-
" \"histograms\": False,\n",
|
92 |
-
" \"correlations\": False\n",
|
93 |
-
" } \n",
|
94 |
-
")\n"
|
95 |
-
]
|
96 |
-
},
|
97 |
-
{
|
98 |
-
"cell_type": "code",
|
99 |
-
"execution_count": 22,
|
100 |
-
"metadata": {},
|
101 |
-
"outputs": [
|
102 |
-
{
|
103 |
-
"data": {
|
104 |
-
"text/plain": [
|
105 |
-
"( date open sentiment\n",
|
106 |
-
" 0 2023-06-26T00:00:00.000Z 250.065 0.119444\n",
|
107 |
-
" 1 2023-07-25T00:00:00.000Z 272.380 0.119444\n",
|
108 |
-
" 2 2023-01-10T00:00:00.000Z 121.070 0.102207\n",
|
109 |
-
" 3 2023-05-11T00:00:00.000Z 168.700 0.141296\n",
|
110 |
-
" 4 2023-08-01T00:00:00.000Z 266.260 0.011111\n",
|
111 |
-
" .. ... ... ...\n",
|
112 |
-
" 464 2022-12-22T00:00:00.000Z 136.000 0.102207\n",
|
113 |
-
" 465 2023-08-23T00:00:00.000Z 229.340 0.024046\n",
|
114 |
-
" 466 2022-09-08T00:00:00.000Z 281.300 0.087306\n",
|
115 |
-
" 467 2023-07-06T00:00:00.000Z 278.090 0.119444\n",
|
116 |
-
" 468 2023-10-27T00:00:00.000Z 210.600 0.164868\n",
|
117 |
-
" \n",
|
118 |
-
" [469 rows x 3 columns],\n",
|
119 |
-
" ticker\n",
|
120 |
-
" 0 TSLA\n",
|
121 |
-
" 1 TSLA\n",
|
122 |
-
" 2 TSLA\n",
|
123 |
-
" 3 TSLA\n",
|
124 |
-
" 4 TSLA\n",
|
125 |
-
" .. ...\n",
|
126 |
-
" 464 TSLA\n",
|
127 |
-
" 465 TSLA\n",
|
128 |
-
" 466 TSLA\n",
|
129 |
-
" 467 TSLA\n",
|
130 |
-
" 468 TSLA\n",
|
131 |
-
" \n",
|
132 |
-
" [469 rows x 1 columns])"
|
133 |
-
]
|
134 |
-
},
|
135 |
-
"execution_count": 22,
|
136 |
-
"metadata": {},
|
137 |
-
"output_type": "execute_result"
|
138 |
-
}
|
139 |
-
],
|
140 |
-
"source": [
|
141 |
-
"sample_data"
|
142 |
-
]
|
143 |
-
}
|
144 |
-
],
|
145 |
-
"metadata": {
|
146 |
-
"kernelspec": {
|
147 |
-
"display_name": "base",
|
148 |
-
"language": "python",
|
149 |
-
"name": "python3"
|
150 |
-
},
|
151 |
-
"language_info": {
|
152 |
-
"codemirror_mode": {
|
153 |
-
"name": "ipython",
|
154 |
-
"version": 3
|
155 |
-
},
|
156 |
-
"file_extension": ".py",
|
157 |
-
"mimetype": "text/x-python",
|
158 |
-
"name": "python",
|
159 |
-
"nbconvert_exporter": "python",
|
160 |
-
"pygments_lexer": "ipython3",
|
161 |
-
"version": "3.11.4"
|
162 |
-
},
|
163 |
-
"orig_nbformat": 4
|
164 |
-
},
|
165 |
-
"nbformat": 4,
|
166 |
-
"nbformat_minor": 2
|
167 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|