Spaces:
No application file
No application file
Check_1
Browse files- feature_pipeline.ipynb +535 -0
- feature_preprocessing.ipynb +116 -0
- news_experimenting.ipynb +191 -36
- news_experimenting1.ipynb +0 -0
feature_pipeline.ipynb
CHANGED
@@ -0,0 +1,535 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 17,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"from dotenv import load_dotenv\n",
|
10 |
+
"import os "
|
11 |
+
]
|
12 |
+
},
|
13 |
+
{
|
14 |
+
"cell_type": "code",
|
15 |
+
"execution_count": 18,
|
16 |
+
"metadata": {},
|
17 |
+
"outputs": [
|
18 |
+
{
|
19 |
+
"name": "stdout",
|
20 |
+
"output_type": "stream",
|
21 |
+
"text": [
|
22 |
+
" 1. open 2. high 3. low 4. close 5. volume\n",
|
23 |
+
"date \n",
|
24 |
+
"2024-04-26 168.85 172.12 166.3700 168.29 109815725.0\n",
|
25 |
+
"2024-04-25 158.96 170.88 158.3600 170.18 126427521.0\n",
|
26 |
+
"2024-04-24 162.84 167.97 157.5100 162.13 181178020.0\n",
|
27 |
+
"2024-04-23 143.33 147.26 141.1100 144.68 124545104.0\n",
|
28 |
+
"2024-04-22 140.56 144.44 138.8025 142.05 107097564.0\n"
|
29 |
+
]
|
30 |
+
}
|
31 |
+
],
|
32 |
+
"source": [
|
33 |
+
"from alpha_vantage.timeseries import TimeSeries\n",
|
34 |
+
"import pandas as pd\n",
|
35 |
+
"\n",
|
36 |
+
"load_dotenv()\n",
|
37 |
+
"\n",
|
38 |
+
"api_key = os.environ.get('stocks_api') # Replace this with your actual API key\n",
|
39 |
+
"ts = TimeSeries(key=api_key, output_format='pandas')\n",
|
40 |
+
"\n",
|
41 |
+
"# Fetch daily adjusted stock prices; adjust the symbol as needed\n",
|
42 |
+
"data, meta_data = ts.get_daily(symbol='TSLA', outputsize='full')\n",
|
43 |
+
"\n",
|
44 |
+
"print(data.head())"
|
45 |
+
]
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"cell_type": "code",
|
49 |
+
"execution_count": null,
|
50 |
+
"metadata": {},
|
51 |
+
"outputs": [
|
52 |
+
{
|
53 |
+
"name": "stdout",
|
54 |
+
"output_type": "stream",
|
55 |
+
"text": [
|
56 |
+
"Feature Group created successfully, explore it at \n",
|
57 |
+
"https://c.app.hopsworks.ai:443/p/549016/fs/544838/fg/752979\n"
|
58 |
+
]
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"data": {
|
62 |
+
"application/vnd.jupyter.widget-view+json": {
|
63 |
+
"model_id": "394c6ab7da624ed388df0b9b8bff469a",
|
64 |
+
"version_major": 2,
|
65 |
+
"version_minor": 0
|
66 |
+
},
|
67 |
+
"text/plain": [
|
68 |
+
"Uploading Dataframe: 0.00% | | Rows 0/3479 | Elapsed Time: 00:00 | Remaining Time: ?"
|
69 |
+
]
|
70 |
+
},
|
71 |
+
"metadata": {},
|
72 |
+
"output_type": "display_data"
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"name": "stdout",
|
76 |
+
"output_type": "stream",
|
77 |
+
"text": [
|
78 |
+
"Launching job: tsla_stock_1_offline_fg_materialization\n",
|
79 |
+
"Job started successfully, you can follow the progress at \n",
|
80 |
+
"https://c.app.hopsworks.ai/p/549016/jobs/named/tsla_stock_1_offline_fg_materialization/executions\n"
|
81 |
+
]
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"data": {
|
85 |
+
"text/plain": [
|
86 |
+
"(<hsfs.core.job.Job at 0x158c80fd0>, None)"
|
87 |
+
]
|
88 |
+
},
|
89 |
+
"metadata": {},
|
90 |
+
"output_type": "display_data"
|
91 |
+
}
|
92 |
+
],
|
93 |
+
"source": []
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"cell_type": "code",
|
97 |
+
"execution_count": 3,
|
98 |
+
"metadata": {},
|
99 |
+
"outputs": [
|
100 |
+
{
|
101 |
+
"name": "stdout",
|
102 |
+
"output_type": "stream",
|
103 |
+
"text": [
|
104 |
+
"<class 'pandas.core.frame.DataFrame'>\n",
|
105 |
+
"DatetimeIndex: 3479 entries, 2024-04-24 to 2010-06-29\n",
|
106 |
+
"Data columns (total 5 columns):\n",
|
107 |
+
" # Column Non-Null Count Dtype \n",
|
108 |
+
"--- ------ -------------- ----- \n",
|
109 |
+
" 0 1. open 3479 non-null float64\n",
|
110 |
+
" 1 2. high 3479 non-null float64\n",
|
111 |
+
" 2 3. low 3479 non-null float64\n",
|
112 |
+
" 3 4. close 3479 non-null float64\n",
|
113 |
+
" 4 5. volume 3479 non-null float64\n",
|
114 |
+
"dtypes: float64(5)\n",
|
115 |
+
"memory usage: 163.1 KB\n"
|
116 |
+
]
|
117 |
+
}
|
118 |
+
],
|
119 |
+
"source": [
|
120 |
+
"data.info()"
|
121 |
+
]
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"cell_type": "code",
|
125 |
+
"execution_count": 4,
|
126 |
+
"metadata": {},
|
127 |
+
"outputs": [
|
128 |
+
{
|
129 |
+
"data": {
|
130 |
+
"text/plain": [
|
131 |
+
"{'1. Information': 'Daily Prices (open, high, low, close) and Volumes',\n",
|
132 |
+
" '2. Symbol': 'TSLA',\n",
|
133 |
+
" '3. Last Refreshed': '2024-04-24',\n",
|
134 |
+
" '4. Output Size': 'Full size',\n",
|
135 |
+
" '5. Time Zone': 'US/Eastern'}"
|
136 |
+
]
|
137 |
+
},
|
138 |
+
"execution_count": 4,
|
139 |
+
"metadata": {},
|
140 |
+
"output_type": "execute_result"
|
141 |
+
}
|
142 |
+
],
|
143 |
+
"source": [
|
144 |
+
"meta_data"
|
145 |
+
]
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"cell_type": "code",
|
149 |
+
"execution_count": 5,
|
150 |
+
"metadata": {},
|
151 |
+
"outputs": [
|
152 |
+
{
|
153 |
+
"name": "stdout",
|
154 |
+
"output_type": "stream",
|
155 |
+
"text": [
|
156 |
+
"Data saved to /Users/manos/Documents/BDS/MLops_mod/TSLA_stock_price.csv\n"
|
157 |
+
]
|
158 |
+
}
|
159 |
+
],
|
160 |
+
"source": [
|
161 |
+
"# Define your file path and name\n",
|
162 |
+
"file_path = '/Users/manos/Documents/BDS/MLops_mod/TSLA_stock_price.csv' # Customize the path and filename\n",
|
163 |
+
"\n",
|
164 |
+
"# Save the DataFrame to CSV\n",
|
165 |
+
"data.to_csv(file_path)\n",
|
166 |
+
"\n",
|
167 |
+
"print(f\"Data saved to {file_path}\")\n"
|
168 |
+
]
|
169 |
+
},
|
170 |
+
{
|
171 |
+
"cell_type": "code",
|
172 |
+
"execution_count": 6,
|
173 |
+
"metadata": {},
|
174 |
+
"outputs": [
|
175 |
+
{
|
176 |
+
"name": "stdout",
|
177 |
+
"output_type": "stream",
|
178 |
+
"text": [
|
179 |
+
" date 1. open 2. high 3. low 4. close 5. volume\n",
|
180 |
+
"0 2024-04-24 162.84 167.97 157.5100 162.13 181178020.0\n",
|
181 |
+
"1 2024-04-23 143.33 147.26 141.1100 144.68 124545104.0\n",
|
182 |
+
"2 2024-04-22 140.56 144.44 138.8025 142.05 107097564.0\n",
|
183 |
+
"3 2024-04-19 148.97 150.94 146.2200 147.05 87074500.0\n",
|
184 |
+
"4 2024-04-18 151.25 152.20 148.7000 149.93 96098830.0\n"
|
185 |
+
]
|
186 |
+
}
|
187 |
+
],
|
188 |
+
"source": [
|
189 |
+
"# Load and display the data from CSV to confirm\n",
|
190 |
+
"tsla_df = pd.read_csv(file_path)\n",
|
191 |
+
"print(tsla_df.head())\n"
|
192 |
+
]
|
193 |
+
},
|
194 |
+
{
|
195 |
+
"cell_type": "code",
|
196 |
+
"execution_count": 7,
|
197 |
+
"metadata": {},
|
198 |
+
"outputs": [
|
199 |
+
{
|
200 |
+
"name": "stdout",
|
201 |
+
"output_type": "stream",
|
202 |
+
"text": [
|
203 |
+
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
204 |
+
]
|
205 |
+
},
|
206 |
+
{
|
207 |
+
"name": "stderr",
|
208 |
+
"output_type": "stream",
|
209 |
+
"text": [
|
210 |
+
"\n",
|
211 |
+
"\n",
|
212 |
+
"UserWarning: The installed hopsworks client version 3.4.4 may not be compatible with the connected Hopsworks backend version 3.7.1. \n",
|
213 |
+
"To ensure compatibility please install the latest bug fix release matching the minor version of your backend (3.7) by running 'pip install hopsworks==3.7.*'\n"
|
214 |
+
]
|
215 |
+
},
|
216 |
+
{
|
217 |
+
"name": "stdout",
|
218 |
+
"output_type": "stream",
|
219 |
+
"text": [
|
220 |
+
"\n",
|
221 |
+
"Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/549016\n",
|
222 |
+
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
223 |
+
]
|
224 |
+
}
|
225 |
+
],
|
226 |
+
"source": [
|
227 |
+
"import hopsworks\n",
|
228 |
+
"\n",
|
229 |
+
"project = hopsworks.login()\n",
|
230 |
+
"fs = project.get_feature_store()\n"
|
231 |
+
]
|
232 |
+
},
|
233 |
+
{
|
234 |
+
"cell_type": "code",
|
235 |
+
"execution_count": 8,
|
236 |
+
"metadata": {},
|
237 |
+
"outputs": [],
|
238 |
+
"source": [
|
239 |
+
"import re "
|
240 |
+
]
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"cell_type": "code",
|
244 |
+
"execution_count": 9,
|
245 |
+
"metadata": {},
|
246 |
+
"outputs": [],
|
247 |
+
"source": [
|
248 |
+
"def clean_column_name(name):\n",
|
249 |
+
" # Remove all non-letter characters\n",
|
250 |
+
" cleaned_name = re.sub(r'[^a-zA-Z]', '', name)\n",
|
251 |
+
" return cleaned_name\n"
|
252 |
+
]
|
253 |
+
},
|
254 |
+
{
|
255 |
+
"cell_type": "code",
|
256 |
+
"execution_count": 10,
|
257 |
+
"metadata": {},
|
258 |
+
"outputs": [
|
259 |
+
{
|
260 |
+
"data": {
|
261 |
+
"text/html": [
|
262 |
+
"<div>\n",
|
263 |
+
"<style scoped>\n",
|
264 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
265 |
+
" vertical-align: middle;\n",
|
266 |
+
" }\n",
|
267 |
+
"\n",
|
268 |
+
" .dataframe tbody tr th {\n",
|
269 |
+
" vertical-align: top;\n",
|
270 |
+
" }\n",
|
271 |
+
"\n",
|
272 |
+
" .dataframe thead th {\n",
|
273 |
+
" text-align: right;\n",
|
274 |
+
" }\n",
|
275 |
+
"</style>\n",
|
276 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
277 |
+
" <thead>\n",
|
278 |
+
" <tr style=\"text-align: right;\">\n",
|
279 |
+
" <th></th>\n",
|
280 |
+
" <th>date</th>\n",
|
281 |
+
" <th>1. open</th>\n",
|
282 |
+
" <th>2. high</th>\n",
|
283 |
+
" <th>3. low</th>\n",
|
284 |
+
" <th>4. close</th>\n",
|
285 |
+
" <th>5. volume</th>\n",
|
286 |
+
" </tr>\n",
|
287 |
+
" </thead>\n",
|
288 |
+
" <tbody>\n",
|
289 |
+
" <tr>\n",
|
290 |
+
" <th>0</th>\n",
|
291 |
+
" <td>2024-04-24</td>\n",
|
292 |
+
" <td>162.84</td>\n",
|
293 |
+
" <td>167.9700</td>\n",
|
294 |
+
" <td>157.5100</td>\n",
|
295 |
+
" <td>162.13</td>\n",
|
296 |
+
" <td>181178020.0</td>\n",
|
297 |
+
" </tr>\n",
|
298 |
+
" <tr>\n",
|
299 |
+
" <th>1</th>\n",
|
300 |
+
" <td>2024-04-23</td>\n",
|
301 |
+
" <td>143.33</td>\n",
|
302 |
+
" <td>147.2600</td>\n",
|
303 |
+
" <td>141.1100</td>\n",
|
304 |
+
" <td>144.68</td>\n",
|
305 |
+
" <td>124545104.0</td>\n",
|
306 |
+
" </tr>\n",
|
307 |
+
" <tr>\n",
|
308 |
+
" <th>2</th>\n",
|
309 |
+
" <td>2024-04-22</td>\n",
|
310 |
+
" <td>140.56</td>\n",
|
311 |
+
" <td>144.4400</td>\n",
|
312 |
+
" <td>138.8025</td>\n",
|
313 |
+
" <td>142.05</td>\n",
|
314 |
+
" <td>107097564.0</td>\n",
|
315 |
+
" </tr>\n",
|
316 |
+
" <tr>\n",
|
317 |
+
" <th>3</th>\n",
|
318 |
+
" <td>2024-04-19</td>\n",
|
319 |
+
" <td>148.97</td>\n",
|
320 |
+
" <td>150.9400</td>\n",
|
321 |
+
" <td>146.2200</td>\n",
|
322 |
+
" <td>147.05</td>\n",
|
323 |
+
" <td>87074500.0</td>\n",
|
324 |
+
" </tr>\n",
|
325 |
+
" <tr>\n",
|
326 |
+
" <th>4</th>\n",
|
327 |
+
" <td>2024-04-18</td>\n",
|
328 |
+
" <td>151.25</td>\n",
|
329 |
+
" <td>152.2000</td>\n",
|
330 |
+
" <td>148.7000</td>\n",
|
331 |
+
" <td>149.93</td>\n",
|
332 |
+
" <td>96098830.0</td>\n",
|
333 |
+
" </tr>\n",
|
334 |
+
" <tr>\n",
|
335 |
+
" <th>...</th>\n",
|
336 |
+
" <td>...</td>\n",
|
337 |
+
" <td>...</td>\n",
|
338 |
+
" <td>...</td>\n",
|
339 |
+
" <td>...</td>\n",
|
340 |
+
" <td>...</td>\n",
|
341 |
+
" <td>...</td>\n",
|
342 |
+
" </tr>\n",
|
343 |
+
" <tr>\n",
|
344 |
+
" <th>3474</th>\n",
|
345 |
+
" <td>2010-07-06</td>\n",
|
346 |
+
" <td>20.00</td>\n",
|
347 |
+
" <td>20.0000</td>\n",
|
348 |
+
" <td>15.8300</td>\n",
|
349 |
+
" <td>16.11</td>\n",
|
350 |
+
" <td>6866900.0</td>\n",
|
351 |
+
" </tr>\n",
|
352 |
+
" <tr>\n",
|
353 |
+
" <th>3475</th>\n",
|
354 |
+
" <td>2010-07-02</td>\n",
|
355 |
+
" <td>23.00</td>\n",
|
356 |
+
" <td>23.1000</td>\n",
|
357 |
+
" <td>18.7100</td>\n",
|
358 |
+
" <td>19.20</td>\n",
|
359 |
+
" <td>5139800.0</td>\n",
|
360 |
+
" </tr>\n",
|
361 |
+
" <tr>\n",
|
362 |
+
" <th>3476</th>\n",
|
363 |
+
" <td>2010-07-01</td>\n",
|
364 |
+
" <td>25.00</td>\n",
|
365 |
+
" <td>25.9200</td>\n",
|
366 |
+
" <td>20.2700</td>\n",
|
367 |
+
" <td>21.96</td>\n",
|
368 |
+
" <td>8218800.0</td>\n",
|
369 |
+
" </tr>\n",
|
370 |
+
" <tr>\n",
|
371 |
+
" <th>3477</th>\n",
|
372 |
+
" <td>2010-06-30</td>\n",
|
373 |
+
" <td>25.79</td>\n",
|
374 |
+
" <td>30.4192</td>\n",
|
375 |
+
" <td>23.3000</td>\n",
|
376 |
+
" <td>23.83</td>\n",
|
377 |
+
" <td>17187100.0</td>\n",
|
378 |
+
" </tr>\n",
|
379 |
+
" <tr>\n",
|
380 |
+
" <th>3478</th>\n",
|
381 |
+
" <td>2010-06-29</td>\n",
|
382 |
+
" <td>19.00</td>\n",
|
383 |
+
" <td>25.0000</td>\n",
|
384 |
+
" <td>17.5400</td>\n",
|
385 |
+
" <td>23.89</td>\n",
|
386 |
+
" <td>18766300.0</td>\n",
|
387 |
+
" </tr>\n",
|
388 |
+
" </tbody>\n",
|
389 |
+
"</table>\n",
|
390 |
+
"<p>3479 rows × 6 columns</p>\n",
|
391 |
+
"</div>"
|
392 |
+
],
|
393 |
+
"text/plain": [
|
394 |
+
" date 1. open 2. high 3. low 4. close 5. volume\n",
|
395 |
+
"0 2024-04-24 162.84 167.9700 157.5100 162.13 181178020.0\n",
|
396 |
+
"1 2024-04-23 143.33 147.2600 141.1100 144.68 124545104.0\n",
|
397 |
+
"2 2024-04-22 140.56 144.4400 138.8025 142.05 107097564.0\n",
|
398 |
+
"3 2024-04-19 148.97 150.9400 146.2200 147.05 87074500.0\n",
|
399 |
+
"4 2024-04-18 151.25 152.2000 148.7000 149.93 96098830.0\n",
|
400 |
+
"... ... ... ... ... ... ...\n",
|
401 |
+
"3474 2010-07-06 20.00 20.0000 15.8300 16.11 6866900.0\n",
|
402 |
+
"3475 2010-07-02 23.00 23.1000 18.7100 19.20 5139800.0\n",
|
403 |
+
"3476 2010-07-01 25.00 25.9200 20.2700 21.96 8218800.0\n",
|
404 |
+
"3477 2010-06-30 25.79 30.4192 23.3000 23.83 17187100.0\n",
|
405 |
+
"3478 2010-06-29 19.00 25.0000 17.5400 23.89 18766300.0\n",
|
406 |
+
"\n",
|
407 |
+
"[3479 rows x 6 columns]"
|
408 |
+
]
|
409 |
+
},
|
410 |
+
"execution_count": 10,
|
411 |
+
"metadata": {},
|
412 |
+
"output_type": "execute_result"
|
413 |
+
}
|
414 |
+
],
|
415 |
+
"source": [
|
416 |
+
"tsla_df"
|
417 |
+
]
|
418 |
+
},
|
419 |
+
{
|
420 |
+
"cell_type": "code",
|
421 |
+
"execution_count": 11,
|
422 |
+
"metadata": {},
|
423 |
+
"outputs": [],
|
424 |
+
"source": [
|
425 |
+
"# Assuming 'tsla_df' is your DataFrame\n",
|
426 |
+
"tsla_df.columns = [clean_column_name(col) for col in tsla_df.columns]\n"
|
427 |
+
]
|
428 |
+
},
|
429 |
+
{
|
430 |
+
"cell_type": "code",
|
431 |
+
"execution_count": 12,
|
432 |
+
"metadata": {},
|
433 |
+
"outputs": [
|
434 |
+
{
|
435 |
+
"name": "stdout",
|
436 |
+
"output_type": "stream",
|
437 |
+
"text": [
|
438 |
+
"Index(['date', 'open', 'high', 'low', 'close', 'volume'], dtype='object')\n"
|
439 |
+
]
|
440 |
+
}
|
441 |
+
],
|
442 |
+
"source": [
|
443 |
+
"print(tsla_df.columns)\n"
|
444 |
+
]
|
445 |
+
},
|
446 |
+
{
|
447 |
+
"cell_type": "code",
|
448 |
+
"execution_count": 13,
|
449 |
+
"metadata": {},
|
450 |
+
"outputs": [],
|
451 |
+
"source": [
|
452 |
+
"# Define a feature group\n",
|
453 |
+
"tesla_fg = fs.get_or_create_feature_group(\n",
|
454 |
+
" name=\"tsla_stock\",\n",
|
455 |
+
" description=\"Tesla stock dataset from alpha vantage\",\n",
|
456 |
+
" version=1,\n",
|
457 |
+
" primary_key=[\"date\"],\n",
|
458 |
+
" online_enabled=True,\n",
|
459 |
+
")"
|
460 |
+
]
|
461 |
+
},
|
462 |
+
{
|
463 |
+
"cell_type": "code",
|
464 |
+
"execution_count": 14,
|
465 |
+
"metadata": {},
|
466 |
+
"outputs": [
|
467 |
+
{
|
468 |
+
"data": {
|
469 |
+
"application/vnd.jupyter.widget-view+json": {
|
470 |
+
"model_id": "91ef74ded4714a1492bdc24b176c4f1e",
|
471 |
+
"version_major": 2,
|
472 |
+
"version_minor": 0
|
473 |
+
},
|
474 |
+
"text/plain": [
|
475 |
+
"Uploading Dataframe: 0.00% | | Rows 0/3479 | Elapsed Time: 00:00 | Remaining Time: ?"
|
476 |
+
]
|
477 |
+
},
|
478 |
+
"metadata": {},
|
479 |
+
"output_type": "display_data"
|
480 |
+
},
|
481 |
+
{
|
482 |
+
"name": "stdout",
|
483 |
+
"output_type": "stream",
|
484 |
+
"text": [
|
485 |
+
"Launching job: tsla_stock_1_offline_fg_materialization\n",
|
486 |
+
"Job started successfully, you can follow the progress at \n",
|
487 |
+
"https://c.app.hopsworks.ai/p/549016/jobs/named/tsla_stock_1_offline_fg_materialization/executions\n"
|
488 |
+
]
|
489 |
+
},
|
490 |
+
{
|
491 |
+
"data": {
|
492 |
+
"text/plain": [
|
493 |
+
"(<hsfs.core.job.Job at 0x177b01510>, None)"
|
494 |
+
]
|
495 |
+
},
|
496 |
+
"execution_count": 14,
|
497 |
+
"metadata": {},
|
498 |
+
"output_type": "execute_result"
|
499 |
+
}
|
500 |
+
],
|
501 |
+
"source": [
|
502 |
+
"tesla_fg.insert(tsla_df, write_options={\"wait_for_job\" : False})"
|
503 |
+
]
|
504 |
+
},
|
505 |
+
{
|
506 |
+
"cell_type": "code",
|
507 |
+
"execution_count": null,
|
508 |
+
"metadata": {},
|
509 |
+
"outputs": [],
|
510 |
+
"source": []
|
511 |
+
}
|
512 |
+
],
|
513 |
+
"metadata": {
|
514 |
+
"kernelspec": {
|
515 |
+
"display_name": "base",
|
516 |
+
"language": "python",
|
517 |
+
"name": "python3"
|
518 |
+
},
|
519 |
+
"language_info": {
|
520 |
+
"codemirror_mode": {
|
521 |
+
"name": "ipython",
|
522 |
+
"version": 3
|
523 |
+
},
|
524 |
+
"file_extension": ".py",
|
525 |
+
"mimetype": "text/x-python",
|
526 |
+
"name": "python",
|
527 |
+
"nbconvert_exporter": "python",
|
528 |
+
"pygments_lexer": "ipython3",
|
529 |
+
"version": "3.11.4"
|
530 |
+
},
|
531 |
+
"orig_nbformat": 4
|
532 |
+
},
|
533 |
+
"nbformat": 4,
|
534 |
+
"nbformat_minor": 2
|
535 |
+
}
|
feature_preprocessing.ipynb
CHANGED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 44,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"from calendar import monthrange\n",
|
10 |
+
"from feature_engineering import *\n",
|
11 |
+
"import glob\n",
|
12 |
+
"import pandas as pd\n",
|
13 |
+
"from dotenv import load_dotenv\n",
|
14 |
+
"import os\n",
|
15 |
+
"\n",
|
16 |
+
"load_dotenv()\n",
|
17 |
+
"\n",
|
18 |
+
"# Set the API endpoint and your API key\n",
|
19 |
+
"endpoint = \"https://api.marketaux.com/v1/news/all?symbols=TSLA&filter_entities=true&published_after=2021&language=en&api_token=iy6rRX4oxFrouZocXr8JNpOzaxZLk3UvMfoMGxYs\"\n",
|
20 |
+
"api_key = os.environ.get('news_api')\n",
|
21 |
+
"\n",
|
22 |
+
"# Set the ticker symbol\n",
|
23 |
+
"ticker = \"TSLA\" #TSLA"
|
24 |
+
]
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"cell_type": "code",
|
28 |
+
"execution_count": 34,
|
29 |
+
"metadata": {},
|
30 |
+
"outputs": [],
|
31 |
+
"source": [
|
32 |
+
"def getNews_historical(api_key,endpoint,ticker,year,month,num=1000):\n",
|
33 |
+
" \n",
|
34 |
+
" for start,end in zip([1,15],[16,monthrange(year, month)[1]]):\n",
|
35 |
+
" \n",
|
36 |
+
" from_date = '{}-{:02d}-{:02d}'.format(year,month,start)\n",
|
37 |
+
" to_date = '{}-{:02d}-{:02d}'.format(year,month,end)\n",
|
38 |
+
" \n",
|
39 |
+
" print('Grabbing News data between {}-{}'.format(from_date,to_date)) \n",
|
40 |
+
" news = getNews(api_key,endpoint,ticker,from_date,to_date)\n",
|
41 |
+
" \n",
|
42 |
+
" print('Number of articles: ',len(news.index))\n",
|
43 |
+
" news.head(n=num)\n",
|
44 |
+
"\n",
|
45 |
+
" # Store the dataframe as a CSV file\n",
|
46 |
+
" news.to_csv(\"/Users/manos/Documents/BDS/MLops_mod/TSLA_news_{}_to_{}.csv\".format(from_date,to_date))"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"cell_type": "code",
|
51 |
+
"execution_count": 36,
|
52 |
+
"metadata": {},
|
53 |
+
"outputs": [
|
54 |
+
{
|
55 |
+
"name": "stdout",
|
56 |
+
"output_type": "stream",
|
57 |
+
"text": [
|
58 |
+
"{'meta': {'found': 58203, 'returned': 3, 'limit': 3, 'page': 1}, 'data': [{'uuid': 'a2f5f0e0-937a-4333-9aa7-da32fb0ede1f', 'title': \"What's next for Big Tech? See what SA analysts have to say\", 'description': 'Technology stocks have dropped over the past couple of weeks. See what SA analysts have to say about the overall state of tech and the economy.', 'keywords': '', 'snippet': 'Technology stocks have dropped over the past couple of weeks, and it was further seen with the selloff in Meta Platforms (META) and weak GDP data, as the two ac...', 'url': 'https://seekingalpha.com/news/4094186-tech-stocks-dive-see-what-sa-analysts-have-to-say', 'image_url': 'https://static.seekingalpha.com/cdn/s3/uploads/getty_images/184997191/image_184997191.jpg?io=getty-c-w750', 'language': 'en', 'published_at': '2024-04-26T12:20:54.000000Z', 'source': 'seekingalpha.com', 'relevance_score': None, 'entities': [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'exchange': None, 'exchange_long': None, 'country': 'us', 'type': 'equity', 'industry': 'Consumer Cyclical', 'match_score': 11.309888, 'sentiment_score': 0, 'highlights': [{'highlight': '<em>Tesla</em> (TSLA) -3.5% .\\n\\nTech ETFs', 'sentiment': 0, 'highlighted_in': 'main_text'}]}], 'similar': []}, {'uuid': '650adf2f-d62f-478d-9322-05d3e7d7532d', 'title': 'Stellantis And Tesla: Combine These Stocks For The Ultimate Automotive Portfolio (STLA)', 'description': 'Tesla and Stellantis are two automakers that complement each other. Find out why I see both STLA and TSLA stocks as currently undervalued.', 'keywords': '', 'snippet': 'Tramino/iStock Unreleased via Getty Images\\n\\nStellantis N.V. (NYSE:STLA) and Tesla, Inc. (TSLA) are two very distinct automakers that, in my view, perfectly comp...', 'url': 'https://seekingalpha.com/article/4686610-stellantis-tesla-combine-these-stocks-for-ultimate-automotive-portfolio', 'image_url': 'https://static.seekingalpha.com/cdn/s3/uploads/getty_images/1305717707/image_1305717707.jpg?io=getty-c-w1536', 'language': 'en', 'published_at': '2024-04-26T10:58:06.000000Z', 'source': 'seekingalpha.com', 'relevance_score': None, 'entities': [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'exchange': None, 'exchange_long': None, 'country': 'us', 'type': 'equity', 'industry': 'Consumer Cyclical', 'match_score': 22.866589, 'sentiment_score': 0.173982, 'highlights': [{'highlight': '(NYSE:STLA) and <em>Tesla</em>, <em>Inc</em>. (TSLA) are two very distinct automakers that, in my view, perfectly complement each other. By entering a 50/50 balanced position in the two companies, you can create an “artificial” automaker in your portfolio that is bound to dominate the industry and provide superior returns for shareholders.', 'sentiment': 0.8519, 'highlighted_in': 'main_text'}, {'highlight': 'The brands of “TESSA” include:\\n\\n<em>Tesla</em>, the leading global EV brand and #1 most valuable car brand in the world. Because of Tesla’s aggressive price policy lately, I believe it makes almost no economic sense to buy an EV that is not a <em>Tesla</em>, for the majority of consumers. More on this shortly.', 'sentiment': 0.2089, 'highlighted_in': 'main_text'}, {'highlight': 'The two overall car brand portfolios encompass all market segments\\n\\nGoing beyond EVs, I see “TESSA’s” car portfolio to cover all segments, again because of the complementarity of <em>Tesla</em> and Stellantis. The below chart outlines how all car segments are covered by either <em>Tesla</em> or Stellantis.', 'sentiment': 0, 'highlighted_in': 'main_text'}, {'highlight': 'To be fair, both Stellantis and <em>Tesla</em> margins declined in 2023, and in the case of <em>Tesla</em>, the company just reported that margins are now down to 5.5% after Q1 price cuts.\\n\\nHowever, I believe that Tesla’s margins at the moment do not tell the full story.', 'sentiment': 0.0258, 'highlighted_in': 'main_text'}, {'highlight': 'It is precisely because it enjoyed a 25%+ operating margin back in 2021 that <em>Tesla</em> was able to grow its company with aggressive pricing in the past 2 years.\\n\\nToday, for the majority of use cases, I believe buying an EV that is not a <em>Tesla</em> does not make rational sense.', 'sentiment': 0.4019, 'highlighted_in': 'main_text'}, {'highlight': 'These are cars that have starting prices that are significantly higher than <em>Tesla</em>, but with worse reviews, worse technology and limited access to Tesla’s SuperCharger system. Even EV-native car brands, such as Rivian and Polestar, have difficulty in competing with <em>Tesla</em>.', 'sentiment': -0.9294, 'highlighted_in': 'main_text'}, {'highlight': 'A Rivian R2 starts at $45,000, which is almost $7,000 more than the base <em>Tesla</em> Model 3.\\n\\nI believe that <em>Tesla</em> is using its margins to grow the EV category, converting ICE consumers, and simultaneously gain the monster share of that growing market.', 'sentiment': 0.743, 'highlighted_in': 'main_text'}, {'highlight': \"Key Financial Metrics for <em>Tesla</em>, Q1 24 (Tesla's Q1 Shareholders Presentation)\\n\\nKey Financial Metrics for Stellantis, Q1 24 (Stellantis' Q1 Shareholder Presentation)\\n\\nThis financial data tells the same story: <em>Tesla</em> and Stellantis complement each other.\", 'sentiment': 0, 'highlighted_in': 'main_text'}, {'highlight': 'VOO since 2021 (Seeking Alpha)\\n\\nWhat matters for my thesis is that <em>Tesla</em> and Stellantis are complementary in how they reward shareholders and how the market prices their stocks. <em>Tesla</em> is a tech company, looking at the long term, and shareholders need to be patient to see returns.', 'sentiment': 0.5859, 'highlighted_in': 'main_text'}, {'highlight': 'In that case, <em>Tesla</em> might generate returns significantly higher than Stellantis, to the point that it would have seemed silly to “dilute” a <em>Tesla</em> investment with another stock.', 'sentiment': 0.0258, 'highlighted_in': 'main_text'}, {'highlight': 'Stellantis And <em>Tesla</em>: Combine These Stocks For The Ultimate Automotive Portfolio (STLA)', 'sentiment': 0, 'highlighted_in': 'title'}]}], 'similar': []}, {'uuid': '47a58bd4-3a8d-40fe-8a89-934d0d695ea4', 'title': 'Tesla is being investigated by the NHTSA for Autopilot software fix (NASDAQ:TSLA)', 'description': \"The National Highway Traffic Safety Administration is investigating whether Tesla's recall of 2 million vehicles for Autopilot safeguards is sufficient.\", 'keywords': '', 'snippet': \"The National Highway Traffic Safety Administration confirmed on Friday that the safety regulator has opened an investigation into whether Tesla's (NASDAQ:TSLA) ...\", 'url': 'https://seekingalpha.com/news/4094754-tesla-is-being-investigated-by-the-nhtsa-for-autopilot-software-fix', 'image_url': 'https://static.seekingalpha.com/cdn/s3/uploads/getty_images/1415090444/image_1415090444.jpg?io=getty-c-w750', 'language': 'en', 'published_at': '2024-04-26T10:50:20.000000Z', 'source': 'seekingalpha.com', 'relevance_score': None, 'entities': [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'exchange': None, 'exchange_long': None, 'country': 'us', 'type': 'equity', 'industry': 'Consumer Cyclical', 'match_score': 51.845444, 'sentiment_score': 0.42985, 'highlights': [{'highlight': \"The National Highway Traffic Safety Administration confirmed on Friday that the safety regulator has opened an investigation into whether Tesla's (<em>NASDAQ:TSLA</em>) recall of more than 2 million vehicles announced in December to install new Autopilot safeguards is adequate.\", 'sentiment': 0.836, 'highlighted_in': 'main_text'}, {'highlight': \"While <em>Tesla</em> has released software updates to address potential issues, NHTSA cited Tesla's statement that a portion of the remedy both requires the owner to opt in and allows a driver to readily reverse it.\", 'sentiment': 0, 'highlighted_in': 'main_text'}, {'highlight': 'In December, <em>Tesla</em> (TSLA) said its largest-ever recall was to better ensure drivers pay attention when using its advanced driver assistance system.\\n\\nShares of <em>Tesla</em> (TSLA) rose 1.17% in premarket trading on Friday to $172.17. The EV stock is down 31.51% on a year-to-date basis. Short interest stands at 3.84% of the total float.', 'sentiment': 0.8834, 'highlighted_in': 'main_text'}, {'highlight': '<em>Tesla</em> is being investigated by the NHTSA for Autopilot software fix (<em>NASDAQ:TSLA</em>)', 'sentiment': 0, 'highlighted_in': 'title'}]}], 'similar': [{'uuid': 'b269d18a-6ea0-4554-a20e-047c623513f9', 'title': 'US probes Tesla recall of 2 million vehicles over Autopilot, citing concerns By Reuters', 'description': 'US probes Tesla recall of 2 million vehicles over Autopilot, citing concerns', 'keywords': '', 'snippet': \"WASHINGTON (Reuters) - U.S. auto safety regulators said Friday they have opened an investigation into whether Tesla (NASDAQ: )'s recall of more than 2 million v...\", 'url': 'https://www.investing.com/news/stock-market-news/us-probes-tesla-recall-of-2-million-vehicles-over-autopilot-citing-concerns-3400236', 'image_url': 'https://i-invdn-com.investing.com/news/moved_LYNXMPEJ580NE_L.jpg', 'language': 'en', 'published_at': '2024-04-26T09:51:10.000000Z', 'source': 'investing.com', 'relevance_score': None, 'entities': [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'exchange': None, 'exchange_long': None, 'country': 'us', 'type': 'equity', 'industry': 'Consumer Cyclical', 'match_score': 25.2132, 'sentiment_score': 0.432933, 'highlights': [{'highlight': \"WASHINGTON (Reuters) - U.S. auto safety regulators said Friday they have opened an investigation into whether <em>Tesla</em> (NASDAQ: )'s recall of more than 2 million vehicles announced in December to install new Autopilot safeguards is adequate.\", 'sentiment': 0.7269, 'highlighted_in': 'main_text'}, {'highlight': 'The agency said <em>Tesla</em> has issued software updates to address issues that appear related to its concerns but has not made them \"a part of the recall or otherwise determined to remedy a defect that poses an unreasonable safety risk.\"', 'sentiment': 0.5719, 'highlighted_in': 'main_text'}, {'highlight': 'US probes <em>Tesla</em> recall of 2 million vehicles over Autopilot, citing concerns By Reuters', 'sentiment': 0, 'highlighted_in': 'title'}]}]}]}]}\n"
|
59 |
+
]
|
60 |
+
}
|
61 |
+
],
|
62 |
+
"source": [
|
63 |
+
"response = requests.get(endpoint)\n",
|
64 |
+
"data = response.json()\n",
|
65 |
+
"print(data) # See what the data looks like\n"
|
66 |
+
]
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"cell_type": "code",
|
70 |
+
"execution_count": 39,
|
71 |
+
"metadata": {},
|
72 |
+
"outputs": [
|
73 |
+
{
|
74 |
+
"name": "stdout",
|
75 |
+
"output_type": "stream",
|
76 |
+
"text": [
|
77 |
+
"Grabbing News data between 2022-01-01-2022-01-16\n"
|
78 |
+
]
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"ename": "ValueError",
|
82 |
+
"evalue": "All arrays must be of the same length",
|
83 |
+
"output_type": "error",
|
84 |
+
"traceback": [
|
85 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
86 |
+
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
|
87 |
+
"\u001b[1;32m/Users/manos/Documents/BDS/MLops_mod/feature_preprocessing.ipynb Cell 4\u001b[0m line \u001b[0;36m4\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/MLops_mod/feature_preprocessing.ipynb#X14sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m \u001b[39mfor\u001b[39;00m year \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(\u001b[39m2022\u001b[39m,\u001b[39m2023\u001b[39m):\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/MLops_mod/feature_preprocessing.ipynb#X14sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m \u001b[39mfor\u001b[39;00m month \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(\u001b[39m1\u001b[39m,\u001b[39m13\u001b[39m):\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/MLops_mod/feature_preprocessing.ipynb#X14sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m getNews_historical(api_key,endpoint,ticker,year,month)\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/MLops_mod/feature_preprocessing.ipynb#X14sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m \u001b[39mif\u001b[39;00m year \u001b[39m==\u001b[39m \u001b[39m2023\u001b[39m \u001b[39mand\u001b[39;00m month \u001b[39m==\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/MLops_mod/feature_preprocessing.ipynb#X14sZmlsZQ%3D%3D?line=5'>6</a>\u001b[0m \u001b[39mbreak\u001b[39;00m\n",
|
88 |
+
"\u001b[1;32m/Users/manos/Documents/BDS/MLops_mod/feature_preprocessing.ipynb Cell 4\u001b[0m line \u001b[0;36m9\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/MLops_mod/feature_preprocessing.ipynb#X14sZmlsZQ%3D%3D?line=5'>6</a>\u001b[0m to_date \u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m-\u001b[39m\u001b[39m{:02d}\u001b[39;00m\u001b[39m-\u001b[39m\u001b[39m{:02d}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39mformat(year,month,end)\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/MLops_mod/feature_preprocessing.ipynb#X14sZmlsZQ%3D%3D?line=7'>8</a>\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m'\u001b[39m\u001b[39mGrabbing News data between \u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m-\u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39mformat(from_date,to_date)) \n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/MLops_mod/feature_preprocessing.ipynb#X14sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m news \u001b[39m=\u001b[39m getNews(api_key,endpoint,ticker,from_date,to_date)\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/MLops_mod/feature_preprocessing.ipynb#X14sZmlsZQ%3D%3D?line=10'>11</a>\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m'\u001b[39m\u001b[39mNumber of articles: \u001b[39m\u001b[39m'\u001b[39m,\u001b[39mlen\u001b[39m(news\u001b[39m.\u001b[39mindex))\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/MLops_mod/feature_preprocessing.ipynb#X14sZmlsZQ%3D%3D?line=11'>12</a>\u001b[0m news\u001b[39m.\u001b[39mhead(n\u001b[39m=\u001b[39mnum)\n",
|
89 |
+
"File \u001b[0;32m~/Documents/BDS/MLops_mod/feature_engineering.py:27\u001b[0m, in \u001b[0;36mgetNews\u001b[0;34m(api_key, endpoint, ticker, from_date, to_date, num)\u001b[0m\n\u001b[1;32m 21\u001b[0m response \u001b[39m=\u001b[39m requests\u001b[39m.\u001b[39mget(endpoint, params\u001b[39m=\u001b[39mparams)\n\u001b[1;32m 23\u001b[0m \u001b[39m# Print the response from the API\u001b[39;00m\n\u001b[1;32m 24\u001b[0m \u001b[39m#print(response.json())\u001b[39;00m\n\u001b[1;32m 25\u001b[0m \n\u001b[1;32m 26\u001b[0m \u001b[39m#Return a Pandas dataframe from the response\u001b[39;00m\n\u001b[0;32m---> 27\u001b[0m \u001b[39mreturn\u001b[39;00m pd\u001b[39m.\u001b[39mDataFrame(response\u001b[39m.\u001b[39mjson())\n",
|
90 |
+
"File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/pandas/core/frame.py:662\u001b[0m, in \u001b[0;36mDataFrame.__init__\u001b[0;34m(self, data, index, columns, dtype, copy)\u001b[0m\n\u001b[1;32m 656\u001b[0m mgr \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_init_mgr(\n\u001b[1;32m 657\u001b[0m data, axes\u001b[39m=\u001b[39m{\u001b[39m\"\u001b[39m\u001b[39mindex\u001b[39m\u001b[39m\"\u001b[39m: index, \u001b[39m\"\u001b[39m\u001b[39mcolumns\u001b[39m\u001b[39m\"\u001b[39m: columns}, dtype\u001b[39m=\u001b[39mdtype, copy\u001b[39m=\u001b[39mcopy\n\u001b[1;32m 658\u001b[0m )\n\u001b[1;32m 660\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(data, \u001b[39mdict\u001b[39m):\n\u001b[1;32m 661\u001b[0m \u001b[39m# GH#38939 de facto copy defaults to False only in non-dict cases\u001b[39;00m\n\u001b[0;32m--> 662\u001b[0m mgr \u001b[39m=\u001b[39m dict_to_mgr(data, index, columns, dtype\u001b[39m=\u001b[39mdtype, copy\u001b[39m=\u001b[39mcopy, typ\u001b[39m=\u001b[39mmanager)\n\u001b[1;32m 663\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(data, ma\u001b[39m.\u001b[39mMaskedArray):\n\u001b[1;32m 664\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mnumpy\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mma\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mmrecords\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mmrecords\u001b[39;00m\n",
|
91 |
+
"File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:493\u001b[0m, in \u001b[0;36mdict_to_mgr\u001b[0;34m(data, index, columns, dtype, typ, copy)\u001b[0m\n\u001b[1;32m 489\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 490\u001b[0m \u001b[39m# dtype check to exclude e.g. range objects, scalars\u001b[39;00m\n\u001b[1;32m 491\u001b[0m arrays \u001b[39m=\u001b[39m [x\u001b[39m.\u001b[39mcopy() \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(x, \u001b[39m\"\u001b[39m\u001b[39mdtype\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39melse\u001b[39;00m x \u001b[39mfor\u001b[39;00m x \u001b[39min\u001b[39;00m arrays]\n\u001b[0;32m--> 493\u001b[0m \u001b[39mreturn\u001b[39;00m arrays_to_mgr(arrays, columns, index, dtype\u001b[39m=\u001b[39mdtype, typ\u001b[39m=\u001b[39mtyp, consolidate\u001b[39m=\u001b[39mcopy)\n",
|
92 |
+
"File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:118\u001b[0m, in \u001b[0;36marrays_to_mgr\u001b[0;34m(arrays, columns, index, dtype, verify_integrity, typ, consolidate)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[39mif\u001b[39;00m verify_integrity:\n\u001b[1;32m 116\u001b[0m \u001b[39m# figure out the index, if necessary\u001b[39;00m\n\u001b[1;32m 117\u001b[0m \u001b[39mif\u001b[39;00m index \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 118\u001b[0m index \u001b[39m=\u001b[39m _extract_index(arrays)\n\u001b[1;32m 119\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 120\u001b[0m index \u001b[39m=\u001b[39m ensure_index(index)\n",
|
93 |
+
"File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/pandas/core/internals/construction.py:666\u001b[0m, in \u001b[0;36m_extract_index\u001b[0;34m(data)\u001b[0m\n\u001b[1;32m 664\u001b[0m lengths \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(\u001b[39mset\u001b[39m(raw_lengths))\n\u001b[1;32m 665\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(lengths) \u001b[39m>\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[0;32m--> 666\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mAll arrays must be of the same length\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 668\u001b[0m \u001b[39mif\u001b[39;00m have_dicts:\n\u001b[1;32m 669\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m 670\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mMixing dicts with non-Series may lead to ambiguous ordering.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 671\u001b[0m )\n",
|
94 |
+
"\u001b[0;31mValueError\u001b[0m: All arrays must be of the same length"
|
95 |
+
]
|
96 |
+
}
|
97 |
+
],
|
98 |
+
"source": [
|
99 |
+
"# Grab old data\n",
|
100 |
+
"for year in range(2022,2023):\n",
|
101 |
+
" for month in range(1,13):\n",
|
102 |
+
" getNews_historical(api_key,endpoint,ticker,year,month)\n",
|
103 |
+
" if year == 2023 and month == 1:\n",
|
104 |
+
" break"
|
105 |
+
]
|
106 |
+
}
|
107 |
+
],
|
108 |
+
"metadata": {
|
109 |
+
"language_info": {
|
110 |
+
"name": "python"
|
111 |
+
},
|
112 |
+
"orig_nbformat": 4
|
113 |
+
},
|
114 |
+
"nbformat": 4,
|
115 |
+
"nbformat_minor": 2
|
116 |
+
}
|
news_experimenting.ipynb
CHANGED
@@ -14,10 +14,22 @@
|
|
14 |
},
|
15 |
{
|
16 |
"cell_type": "code",
|
17 |
-
"execution_count":
|
18 |
"metadata": {},
|
19 |
"outputs": [],
|
20 |
"source": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
"def fetch_tesla_news(api_key, start_date, end_date):\n",
|
22 |
" url = \"https://api.marketaux.com/v1/news/all?symbols=TSLA&filter_entities=true&language=en&api_token=iy6rRX4oxFrouZocXr8JNpOzaxZLk3UvMfoMGxYs\" # Modify this based on the exact endpoint you need\n",
|
23 |
" headers = {\n",
|
@@ -43,7 +55,7 @@
|
|
43 |
},
|
44 |
{
|
45 |
"cell_type": "code",
|
46 |
-
"execution_count":
|
47 |
"metadata": {},
|
48 |
"outputs": [
|
49 |
{
|
@@ -51,44 +63,39 @@
|
|
51 |
"output_type": "stream",
|
52 |
"text": [
|
53 |
" uuid \\\n",
|
54 |
-
"0
|
55 |
-
"1
|
56 |
-
"2
|
57 |
"\n",
|
58 |
" title \\\n",
|
59 |
-
"0
|
60 |
-
"1
|
61 |
-
"2
|
62 |
-
"\n",
|
63 |
-
" description \\\n",
|
64 |
-
"0 The wait for Tesla results is on as investors ... \n",
|
65 |
-
"1 Here are the most important news items that in... \n",
|
66 |
-
"2 UnitedHealth confirms personal data compromise... \n",
|
67 |
"\n",
|
68 |
-
"
|
69 |
-
"0
|
70 |
-
"1
|
71 |
-
"2
|
72 |
"\n",
|
73 |
" snippet \\\n",
|
74 |
-
"0
|
75 |
-
"1
|
76 |
-
"2
|
77 |
"\n",
|
78 |
" url \\\n",
|
79 |
-
"0 https://
|
80 |
-
"1 https://www.
|
81 |
-
"2 https://
|
82 |
"\n",
|
83 |
" image_url language \\\n",
|
84 |
-
"0 https://
|
85 |
-
"1 https://
|
86 |
-
"2 https://
|
87 |
"\n",
|
88 |
-
" published_at
|
89 |
-
"0 2024-04-
|
90 |
-
"1 2024-04-
|
91 |
-
"2 2024-04-
|
92 |
"\n",
|
93 |
" entities \\\n",
|
94 |
"0 [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex... \n",
|
@@ -96,24 +103,172 @@
|
|
96 |
"2 [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex... \n",
|
97 |
"\n",
|
98 |
" similar \n",
|
99 |
-
"0
|
100 |
"1 [] \n",
|
101 |
-
"2
|
102 |
]
|
103 |
}
|
104 |
],
|
105 |
"source": [
|
106 |
-
"api_key = \
|
107 |
"tesla_news_df = fetch_tesla_news(api_key, \"2017-01-01\", datetime.now().strftime('%Y-%m-%d'))\n",
|
108 |
"print(tesla_news_df.head())"
|
109 |
]
|
110 |
},
|
111 |
{
|
112 |
"cell_type": "code",
|
113 |
-
"execution_count":
|
114 |
"metadata": {},
|
115 |
-
"outputs": [
|
116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
},
|
118 |
{
|
119 |
"cell_type": "code",
|
|
|
14 |
},
|
15 |
{
|
16 |
"cell_type": "code",
|
17 |
+
"execution_count": 41,
|
18 |
"metadata": {},
|
19 |
"outputs": [],
|
20 |
"source": [
|
21 |
+
"from dotenv import load_dotenv\n",
|
22 |
+
"import os"
|
23 |
+
]
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"cell_type": "code",
|
27 |
+
"execution_count": 43,
|
28 |
+
"metadata": {},
|
29 |
+
"outputs": [],
|
30 |
+
"source": [
|
31 |
+
"load_dotenv()\n",
|
32 |
+
"\n",
|
33 |
"def fetch_tesla_news(api_key, start_date, end_date):\n",
|
34 |
" url = \"https://api.marketaux.com/v1/news/all?symbols=TSLA&filter_entities=true&language=en&api_token=iy6rRX4oxFrouZocXr8JNpOzaxZLk3UvMfoMGxYs\" # Modify this based on the exact endpoint you need\n",
|
35 |
" headers = {\n",
|
|
|
55 |
},
|
56 |
{
|
57 |
"cell_type": "code",
|
58 |
+
"execution_count": 44,
|
59 |
"metadata": {},
|
60 |
"outputs": [
|
61 |
{
|
|
|
63 |
"output_type": "stream",
|
64 |
"text": [
|
65 |
" uuid \\\n",
|
66 |
+
"0 99ce54fb-eb5b-4c9d-be66-6a56aa9f6de3 \n",
|
67 |
+
"1 9286f959-f5ea-4b95-88e8-7ba99a472ca5 \n",
|
68 |
+
"2 92e41393-f923-4c98-ba38-5bab89fedabd \n",
|
69 |
"\n",
|
70 |
" title \\\n",
|
71 |
+
"0 S&P 500 gains as investors digest positive ear... \n",
|
72 |
+
"1 3 Defensive Stocks to Protect Your Portfolio i... \n",
|
73 |
+
"2 Tesla to lay off more than 3,000 employees in ... \n",
|
|
|
|
|
|
|
|
|
|
|
74 |
"\n",
|
75 |
+
" description keywords \\\n",
|
76 |
+
"0 Benchmark S&P 500 rose on\\nTuesday following p... Markets \n",
|
77 |
+
"1 Stocks Analysis by The Tokenist (Timothy Fries... \n",
|
78 |
+
"2 Tesla is set to cut 3,332\\njobs in California,... Markets \n",
|
79 |
"\n",
|
80 |
" snippet \\\n",
|
81 |
+
"0 * Tesla set to kick off Magnificent Seven earn... \n",
|
82 |
+
"1 After the Iran-Israel situation fizzled out fo... \n",
|
83 |
+
"2 Tesla, Inc. designs, builds, and sells electri... \n",
|
84 |
"\n",
|
85 |
" url \\\n",
|
86 |
+
"0 https://www.marketscreener.com/quote/stock/TES... \n",
|
87 |
+
"1 https://www.investing.com/analysis/3-defensive... \n",
|
88 |
+
"2 https://www.marketscreener.com/quote/stock/TES... \n",
|
89 |
"\n",
|
90 |
" image_url language \\\n",
|
91 |
+
"0 https://img.zonebourse.com/reuters/2016-04/201... en \n",
|
92 |
+
"1 https://i-invdn-com.investing.com/redesign/ima... en \n",
|
93 |
+
"2 https://www.marketscreener.com/images/reuters/... en \n",
|
94 |
"\n",
|
95 |
+
" published_at source relevance_score \\\n",
|
96 |
+
"0 2024-04-23T19:10:43.000000Z marketscreener.com None \n",
|
97 |
+
"1 2024-04-23T19:06:00.000000Z investing.com None \n",
|
98 |
+
"2 2024-04-23T18:50:54.000000Z marketscreener.com None \n",
|
99 |
"\n",
|
100 |
" entities \\\n",
|
101 |
"0 [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex... \n",
|
|
|
103 |
"2 [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex... \n",
|
104 |
"\n",
|
105 |
" similar \n",
|
106 |
+
"0 [] \n",
|
107 |
"1 [] \n",
|
108 |
+
"2 [{'uuid': 'df8f1cf3-89ca-4430-975a-131b9c31245... \n"
|
109 |
]
|
110 |
}
|
111 |
],
|
112 |
"source": [
|
113 |
+
"api_key = os.environ.get('news_api')\n",
|
114 |
"tesla_news_df = fetch_tesla_news(api_key, \"2017-01-01\", datetime.now().strftime('%Y-%m-%d'))\n",
|
115 |
"print(tesla_news_df.head())"
|
116 |
]
|
117 |
},
|
118 |
{
|
119 |
"cell_type": "code",
|
120 |
+
"execution_count": 40,
|
121 |
"metadata": {},
|
122 |
+
"outputs": [
|
123 |
+
{
|
124 |
+
"data": {
|
125 |
+
"text/html": [
|
126 |
+
"<div>\n",
|
127 |
+
"<style scoped>\n",
|
128 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
129 |
+
" vertical-align: middle;\n",
|
130 |
+
" }\n",
|
131 |
+
"\n",
|
132 |
+
" .dataframe tbody tr th {\n",
|
133 |
+
" vertical-align: top;\n",
|
134 |
+
" }\n",
|
135 |
+
"\n",
|
136 |
+
" .dataframe thead th {\n",
|
137 |
+
" text-align: right;\n",
|
138 |
+
" }\n",
|
139 |
+
"</style>\n",
|
140 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
141 |
+
" <thead>\n",
|
142 |
+
" <tr style=\"text-align: right;\">\n",
|
143 |
+
" <th></th>\n",
|
144 |
+
" <th>uuid</th>\n",
|
145 |
+
" <th>title</th>\n",
|
146 |
+
" <th>description</th>\n",
|
147 |
+
" <th>keywords</th>\n",
|
148 |
+
" <th>snippet</th>\n",
|
149 |
+
" <th>url</th>\n",
|
150 |
+
" <th>image_url</th>\n",
|
151 |
+
" <th>language</th>\n",
|
152 |
+
" <th>published_at</th>\n",
|
153 |
+
" <th>source</th>\n",
|
154 |
+
" <th>relevance_score</th>\n",
|
155 |
+
" <th>entities</th>\n",
|
156 |
+
" <th>similar</th>\n",
|
157 |
+
" </tr>\n",
|
158 |
+
" </thead>\n",
|
159 |
+
" <tbody>\n",
|
160 |
+
" <tr>\n",
|
161 |
+
" <th>0</th>\n",
|
162 |
+
" <td>daf76e3e-caea-4c92-a461-6b3132655788</td>\n",
|
163 |
+
" <td>Stock market today: US futures climb as earnin...</td>\n",
|
164 |
+
" <td>The wait for Tesla results is on as investors ...</td>\n",
|
165 |
+
" <td></td>\n",
|
166 |
+
" <td>US stocks climbed on Tuesday, on track for fur...</td>\n",
|
167 |
+
" <td>https://finance.yahoo.com/news/stock-market-to...</td>\n",
|
168 |
+
" <td>https://s.yimg.com/ny/api/res/1.2/mqjC0VUO61dY...</td>\n",
|
169 |
+
" <td>en</td>\n",
|
170 |
+
" <td>2024-04-23T11:22:53.000000Z</td>\n",
|
171 |
+
" <td>finance.yahoo.com</td>\n",
|
172 |
+
" <td>None</td>\n",
|
173 |
+
" <td>[{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex...</td>\n",
|
174 |
+
" <td>[{'uuid': '01cd65de-530d-407a-96c9-5b3359e98a0...</td>\n",
|
175 |
+
" </tr>\n",
|
176 |
+
" <tr>\n",
|
177 |
+
" <th>1</th>\n",
|
178 |
+
" <td>8dab10ca-5b23-465a-aa86-360bc987a774</td>\n",
|
179 |
+
" <td>5 things to know before the stock market opens...</td>\n",
|
180 |
+
" <td>Here are the most important news items that in...</td>\n",
|
181 |
+
" <td>Investment strategy, Economy, Markets, Busines...</td>\n",
|
182 |
+
" <td>In this article CPRI Follow your favorite stoc...</td>\n",
|
183 |
+
" <td>https://www.cnbc.com/2024/04/23/5-things-to-kn...</td>\n",
|
184 |
+
" <td>https://image.cnbcfm.com/api/v1/image/10692170...</td>\n",
|
185 |
+
" <td>en</td>\n",
|
186 |
+
" <td>2024-04-23T11:16:00.000000Z</td>\n",
|
187 |
+
" <td>cnbc.com</td>\n",
|
188 |
+
" <td>None</td>\n",
|
189 |
+
" <td>[{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex...</td>\n",
|
190 |
+
" <td>[]</td>\n",
|
191 |
+
" </tr>\n",
|
192 |
+
" <tr>\n",
|
193 |
+
" <th>2</th>\n",
|
194 |
+
" <td>b8c381b9-4187-433e-ad15-cecc9d227b13</td>\n",
|
195 |
+
" <td>Wall Street Breakfast Podcast: UNH: Personal D...</td>\n",
|
196 |
+
" <td>UnitedHealth confirms personal data compromise...</td>\n",
|
197 |
+
" <td></td>\n",
|
198 |
+
" <td>JHVEPhoto/iStock Editorial via Getty Images\\n\\...</td>\n",
|
199 |
+
" <td>https://seekingalpha.com/article/4685243-wall-...</td>\n",
|
200 |
+
" <td>https://static.seekingalpha.com/cdn/s3/uploads...</td>\n",
|
201 |
+
" <td>en</td>\n",
|
202 |
+
" <td>2024-04-23T11:00:00.000000Z</td>\n",
|
203 |
+
" <td>seekingalpha.com</td>\n",
|
204 |
+
" <td>None</td>\n",
|
205 |
+
" <td>[{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex...</td>\n",
|
206 |
+
" <td>[]</td>\n",
|
207 |
+
" </tr>\n",
|
208 |
+
" </tbody>\n",
|
209 |
+
"</table>\n",
|
210 |
+
"</div>"
|
211 |
+
],
|
212 |
+
"text/plain": [
|
213 |
+
" uuid \\\n",
|
214 |
+
"0 daf76e3e-caea-4c92-a461-6b3132655788 \n",
|
215 |
+
"1 8dab10ca-5b23-465a-aa86-360bc987a774 \n",
|
216 |
+
"2 b8c381b9-4187-433e-ad15-cecc9d227b13 \n",
|
217 |
+
"\n",
|
218 |
+
" title \\\n",
|
219 |
+
"0 Stock market today: US futures climb as earnin... \n",
|
220 |
+
"1 5 things to know before the stock market opens... \n",
|
221 |
+
"2 Wall Street Breakfast Podcast: UNH: Personal D... \n",
|
222 |
+
"\n",
|
223 |
+
" description \\\n",
|
224 |
+
"0 The wait for Tesla results is on as investors ... \n",
|
225 |
+
"1 Here are the most important news items that in... \n",
|
226 |
+
"2 UnitedHealth confirms personal data compromise... \n",
|
227 |
+
"\n",
|
228 |
+
" keywords \\\n",
|
229 |
+
"0 \n",
|
230 |
+
"1 Investment strategy, Economy, Markets, Busines... \n",
|
231 |
+
"2 \n",
|
232 |
+
"\n",
|
233 |
+
" snippet \\\n",
|
234 |
+
"0 US stocks climbed on Tuesday, on track for fur... \n",
|
235 |
+
"1 In this article CPRI Follow your favorite stoc... \n",
|
236 |
+
"2 JHVEPhoto/iStock Editorial via Getty Images\\n\\... \n",
|
237 |
+
"\n",
|
238 |
+
" url \\\n",
|
239 |
+
"0 https://finance.yahoo.com/news/stock-market-to... \n",
|
240 |
+
"1 https://www.cnbc.com/2024/04/23/5-things-to-kn... \n",
|
241 |
+
"2 https://seekingalpha.com/article/4685243-wall-... \n",
|
242 |
+
"\n",
|
243 |
+
" image_url language \\\n",
|
244 |
+
"0 https://s.yimg.com/ny/api/res/1.2/mqjC0VUO61dY... en \n",
|
245 |
+
"1 https://image.cnbcfm.com/api/v1/image/10692170... en \n",
|
246 |
+
"2 https://static.seekingalpha.com/cdn/s3/uploads... en \n",
|
247 |
+
"\n",
|
248 |
+
" published_at source relevance_score \\\n",
|
249 |
+
"0 2024-04-23T11:22:53.000000Z finance.yahoo.com None \n",
|
250 |
+
"1 2024-04-23T11:16:00.000000Z cnbc.com None \n",
|
251 |
+
"2 2024-04-23T11:00:00.000000Z seekingalpha.com None \n",
|
252 |
+
"\n",
|
253 |
+
" entities \\\n",
|
254 |
+
"0 [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex... \n",
|
255 |
+
"1 [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex... \n",
|
256 |
+
"2 [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex... \n",
|
257 |
+
"\n",
|
258 |
+
" similar \n",
|
259 |
+
"0 [{'uuid': '01cd65de-530d-407a-96c9-5b3359e98a0... \n",
|
260 |
+
"1 [] \n",
|
261 |
+
"2 [] "
|
262 |
+
]
|
263 |
+
},
|
264 |
+
"execution_count": 40,
|
265 |
+
"metadata": {},
|
266 |
+
"output_type": "execute_result"
|
267 |
+
}
|
268 |
+
],
|
269 |
+
"source": [
|
270 |
+
"tesla_news_df"
|
271 |
+
]
|
272 |
},
|
273 |
{
|
274 |
"cell_type": "code",
|
news_experimenting1.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|