mtzeve commited on
Commit
18a27bc
·
1 Parent(s): 906df51
feature_pipeline.ipynb CHANGED
@@ -111,7 +111,7 @@
111
  ],
112
  "source": [
113
  "# Define your file path and name\n",
114
- "file_path = '/Users/manos/Documents/BDS/MLops_mod/TSLA_stock_price.csv' # Customize the path and filename\n",
115
  "\n",
116
  "# Save the DataFrame to CSV\n",
117
  "data.to_csv(file_path)\n",
@@ -503,7 +503,7 @@
503
  ],
504
  "source": [
505
  "# Create feature group for historical news data\n",
506
- "news_df = pd.read_csv('/Users/manos/Documents/BDS/MLops_mod/news_articles.csv')\n",
507
  "\n",
508
  "news_sentiment_fg = fs.get_or_create_feature_group(\n",
509
  " name='news_sentiment',\n",
 
111
  ],
112
  "source": [
113
  "# Define your file path and name\n",
114
+ "file_path = 'TSLA_stock_price.csv' # Customize the path and filename\n",
115
  "\n",
116
  "# Save the DataFrame to CSV\n",
117
  "data.to_csv(file_path)\n",
 
503
  ],
504
  "source": [
505
  "# Create feature group for historical news data\n",
506
+ "news_df = pd.read_csv('news_articles.csv')\n",
507
  "\n",
508
  "news_sentiment_fg = fs.get_or_create_feature_group(\n",
509
  " name='news_sentiment',\n",
feature_pipeline.py CHANGED
@@ -27,7 +27,7 @@ meta_data
27
 
28
  # %%
29
  # Define your file path and name
30
- file_path = '/Users/manos/Documents/BDS/MLops_mod/TSLA_stock_price.csv' # Customize the path and filename
31
 
32
  # Save the DataFrame to CSV
33
  data.to_csv(file_path)
@@ -85,7 +85,7 @@ tesla_fg.insert(tsla_df, write_options={"wait_for_job" : False})
85
 
86
  # %%
87
  # Create feature group for historical news data
88
- news_df = pd.read_csv('/Users/manos/Documents/BDS/MLops_mod/news_articles.csv')
89
 
90
  news_sentiment_fg = fs.get_or_create_feature_group(
91
  name='news_sentiment',
 
27
 
28
  # %%
29
  # Define your file path and name
30
+ file_path = 'TSLA_stock_price.csv' # Customize the path and filename
31
 
32
  # Save the DataFrame to CSV
33
  data.to_csv(file_path)
 
85
 
86
  # %%
87
  # Create feature group for historical news data
88
+ news_df = pd.read_csv('news_articles.csv')
89
 
90
  news_sentiment_fg = fs.get_or_create_feature_group(
91
  name='news_sentiment',
historical_news.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 2,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
@@ -17,7 +17,7 @@
17
  },
18
  {
19
  "cell_type": "code",
20
- "execution_count": 3,
21
  "metadata": {},
22
  "outputs": [
23
  {
@@ -26,7 +26,7 @@
26
  "True"
27
  ]
28
  },
29
- "execution_count": 3,
30
  "metadata": {},
31
  "output_type": "execute_result"
32
  }
@@ -37,30 +37,30 @@
37
  },
38
  {
39
  "cell_type": "code",
40
- "execution_count": 5,
41
  "metadata": {},
42
  "outputs": [
43
  {
44
  "name": "stdout",
45
  "output_type": "stream",
46
  "text": [
47
- "Fetched 50 articles from 2022-04-01 to 2022-05-21\n",
48
- "Fetched 50 articles from 2022-05-22 to 2022-07-11\n",
49
- "Fetched 50 articles from 2022-07-12 to 2022-08-31\n",
50
- "Fetched 50 articles from 2022-09-01 to 2022-10-21\n",
51
- "Fetched 50 articles from 2022-10-22 to 2022-12-11\n",
52
  "Rate limit reached. Waiting to retry...\n",
53
- "Fetched 50 articles from 2022-12-12 to 2023-01-31\n",
54
- "Fetched 50 articles from 2023-02-01 to 2023-03-23\n",
55
- "Fetched 50 articles from 2023-03-24 to 2023-05-13\n",
56
- "Fetched 50 articles from 2023-05-14 to 2023-07-03\n",
57
- "Fetched 50 articles from 2023-07-04 to 2023-08-23\n",
58
  "Rate limit reached. Waiting to retry...\n",
59
- "Fetched 50 articles from 2023-08-24 to 2023-10-13\n",
60
- "Fetched 50 articles from 2023-10-14 to 2023-12-03\n",
61
- "Fetched 50 articles from 2023-12-04 to 2024-01-23\n",
62
- "Fetched 50 articles from 2024-01-24 to 2024-03-14\n",
63
- "Fetched 50 articles from 2024-03-15 to 2024-04-01\n",
64
  "Total articles fetched: 750\n"
65
  ]
66
  }
@@ -120,7 +120,7 @@
120
  },
121
  {
122
  "cell_type": "code",
123
- "execution_count": 6,
124
  "metadata": {},
125
  "outputs": [],
126
  "source": [
@@ -130,7 +130,7 @@
130
  },
131
  {
132
  "cell_type": "code",
133
- "execution_count": 7,
134
  "metadata": {},
135
  "outputs": [
136
  {
@@ -138,25 +138,25 @@
138
  "output_type": "stream",
139
  "text": [
140
  "<class 'pandas.core.frame.DataFrame'>\n",
141
- "DatetimeIndex: 712 entries, 1970-01-01 00:00:00 to 1970-01-01 00:00:00.000000711\n",
142
  "Data columns (total 13 columns):\n",
143
  " # Column Non-Null Count Dtype \n",
144
  "--- ------ -------------- ----- \n",
145
- " 0 date 712 non-null object \n",
146
- " 1 id 712 non-null object \n",
147
- " 2 publisher 712 non-null object \n",
148
- " 3 title 712 non-null object \n",
149
- " 4 author 712 non-null object \n",
150
- " 5 article_url 712 non-null object \n",
151
- " 6 tickers 712 non-null object \n",
152
- " 7 amp_url 712 non-null object \n",
153
- " 8 image_url 712 non-null object \n",
154
- " 9 description 712 non-null object \n",
155
- " 10 keywords 712 non-null object \n",
156
- " 11 sentiment 712 non-null float64\n",
157
- " 12 time 712 non-null object \n",
158
  "dtypes: float64(1), object(12)\n",
159
- "memory usage: 77.9+ KB\n"
160
  ]
161
  }
162
  ],
@@ -166,7 +166,7 @@
166
  },
167
  {
168
  "cell_type": "code",
169
- "execution_count": 8,
170
  "metadata": {},
171
  "outputs": [
172
  {
@@ -208,83 +208,83 @@
208
  " <tbody>\n",
209
  " <tr>\n",
210
  " <th>1970-01-01 00:00:00.000000000</th>\n",
211
- " <td>2022-05-21</td>\n",
212
- " <td>rHYPs-UNybcfTGof0g3bkqHkaA4KA5z1UbSPJ7gmaaQ</td>\n",
213
  " <td>{'name': 'Benzinga', 'homepage_url': 'https://...</td>\n",
214
- " <td>Elon Musk Says Twitter Refusing To Cough Up Sp...</td>\n",
215
- " <td>Bibhu Pattnaik</td>\n",
216
- " <td>https://www.benzinga.com/news/22/05/27337474/e...</td>\n",
217
- " <td>[TSLA, TWTR]</td>\n",
218
- " <td>https://www.benzinga.com/amp/content/27337474</td>\n",
219
  " <td>https://cdn.benzinga.com/files/images/story/20...</td>\n",
220
- " <td>Tesla Inc (NASDAQ: TSLA) CEO Elon Musk seems t...</td>\n",
221
- " <td>[News, Top Stories, Tech]</td>\n",
222
- " <td>-0.064242</td>\n",
223
- " <td>19:39:28</td>\n",
224
  " </tr>\n",
225
  " <tr>\n",
226
  " <th>1970-01-01 00:00:00.000000001</th>\n",
227
- " <td>2022-05-21</td>\n",
228
- " <td>eYUSHsraFoKd8y4OiDtesI8pSbne2M4XDzjq7vmVRfw</td>\n",
229
- " <td>{'name': 'The Motley Fool', 'homepage_url': 'h...</td>\n",
230
- " <td>Why Twitter Stock Fell This Week</td>\n",
231
- " <td>newsfeedback@fool.com (Daniel Sparks)</td>\n",
232
- " <td>https://www.fool.com/investing/2022/05/21/why-...</td>\n",
233
- " <td>[TWTR, TSLA]</td>\n",
234
  " <td>No URL provided</td>\n",
235
- " <td>https://g.foolcdn.com/editorial/images/681359/...</td>\n",
236
- " <td>Is the company's $44 billion deal at risk of n...</td>\n",
237
- " <td>[investing]</td>\n",
238
- " <td>0.000000</td>\n",
239
- " <td>12:16:04</td>\n",
240
- " </tr>\n",
241
- " <tr>\n",
242
- " <th>1970-01-01 00:00:00.000000002</th>\n",
243
- " <td>2022-05-21</td>\n",
244
- " <td>LMfdls7oU_GPgoSbFgq2xvW2bj0QwgWWD8SwHVAMEL4</td>\n",
245
- " <td>{'name': 'Seeking Alpha', 'homepage_url': 'htt...</td>\n",
246
- " <td>S&amp;P 500 Earnings Update: Earnings Yield Above ...</td>\n",
247
- " <td>Brian Gilmartin, CFA</td>\n",
248
- " <td>https://seekingalpha.com/article/4513558-sp500...</td>\n",
249
- " <td>[WMT, TSLA, TGT, BAPR, BAUG, BJUL, BJUN, BMAR,...</td>\n",
250
- " <td>https://seekingalpha.com/amp/article/4513558-s...</td>\n",
251
- " <td>https://static.seekingalpha.com/cdn/s3/uploads...</td>\n",
252
- " <td>The S&amp;P 500 earnings yield is 6.03% vs last we...</td>\n",
253
  " <td>No keywords</td>\n",
254
- " <td>0.000000</td>\n",
255
- " <td>04:00:00</td>\n",
256
  " </tr>\n",
257
  " <tr>\n",
258
- " <th>1970-01-01 00:00:00.000000003</th>\n",
259
- " <td>2022-05-20</td>\n",
260
- " <td>PsNkJVaCAXki25-hZcJk2irqOky8s5cNOypZqHqgrz4</td>\n",
261
  " <td>{'name': 'The Motley Fool', 'homepage_url': 'h...</td>\n",
262
- " <td>Why Tesla Stock Was Slammed on Friday</td>\n",
263
- " <td>newsfeedback@fool.com (Daniel Sparks)</td>\n",
264
- " <td>https://www.fool.com/investing/2022/05/20/why-...</td>\n",
265
- " <td>[TSLA, TWTR]</td>\n",
266
  " <td>No URL provided</td>\n",
267
- " <td>https://g.foolcdn.com/editorial/images/681336/...</td>\n",
268
- " <td>The electric-car maker's stock has been gettin...</td>\n",
269
  " <td>[investing]</td>\n",
270
- " <td>0.187500</td>\n",
271
- " <td>22:56:26</td>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272
  " </tr>\n",
273
  " <tr>\n",
274
  " <th>1970-01-01 00:00:00.000000004</th>\n",
275
- " <td>2022-05-20</td>\n",
276
- " <td>nMdkN0UNxwOrP0VLjCpYzR6xq-rzxS-5uM3qKEQsftE</td>\n",
277
- " <td>{'name': 'Benzinga', 'homepage_url': 'https://...</td>\n",
278
- " <td>S&amp;P 500 Falls For 7th Consecutive Week Amid On...</td>\n",
279
- " <td>Henry Khederian</td>\n",
280
- " <td>https://www.benzinga.com/news/22/05/27332539/s...</td>\n",
281
- " <td>[DIA, SPY, ROST, AMT, DE, LLY, VFC, TSLA, QQQ]</td>\n",
282
- " <td>https://www.benzinga.com/amp/content/27332539</td>\n",
283
- " <td>https://cdn.benzinga.com/files/images/story/20...</td>\n",
284
- " <td>U.S. indices rebounded off session lows Friday...</td>\n",
285
- " <td>[News, After-Hours Center, Markets, Movers, Tr...</td>\n",
286
- " <td>0.096032</td>\n",
287
- " <td>20:17:20</td>\n",
288
  " </tr>\n",
289
  " </tbody>\n",
290
  "</table>\n",
@@ -292,91 +292,84 @@
292
  ],
293
  "text/plain": [
294
  " date \\\n",
295
- "1970-01-01 00:00:00.000000000 2022-05-21 \n",
296
- "1970-01-01 00:00:00.000000001 2022-05-21 \n",
297
- "1970-01-01 00:00:00.000000002 2022-05-21 \n",
298
- "1970-01-01 00:00:00.000000003 2022-05-20 \n",
299
- "1970-01-01 00:00:00.000000004 2022-05-20 \n",
300
  "\n",
301
  " id \\\n",
302
- "1970-01-01 00:00:00.000000000 rHYPs-UNybcfTGof0g3bkqHkaA4KA5z1UbSPJ7gmaaQ \n",
303
- "1970-01-01 00:00:00.000000001 eYUSHsraFoKd8y4OiDtesI8pSbne2M4XDzjq7vmVRfw \n",
304
- "1970-01-01 00:00:00.000000002 LMfdls7oU_GPgoSbFgq2xvW2bj0QwgWWD8SwHVAMEL4 \n",
305
- "1970-01-01 00:00:00.000000003 PsNkJVaCAXki25-hZcJk2irqOky8s5cNOypZqHqgrz4 \n",
306
- "1970-01-01 00:00:00.000000004 nMdkN0UNxwOrP0VLjCpYzR6xq-rzxS-5uM3qKEQsftE \n",
307
  "\n",
308
  " publisher \\\n",
309
  "1970-01-01 00:00:00.000000000 {'name': 'Benzinga', 'homepage_url': 'https://... \n",
310
- "1970-01-01 00:00:00.000000001 {'name': 'The Motley Fool', 'homepage_url': 'h... \n",
311
- "1970-01-01 00:00:00.000000002 {'name': 'Seeking Alpha', 'homepage_url': 'htt... \n",
312
- "1970-01-01 00:00:00.000000003 {'name': 'The Motley Fool', 'homepage_url': 'h... \n",
313
- "1970-01-01 00:00:00.000000004 {'name': 'Benzinga', 'homepage_url': 'https://... \n",
314
  "\n",
315
  " title \\\n",
316
- "1970-01-01 00:00:00.000000000 Elon Musk Says Twitter Refusing To Cough Up Sp... \n",
317
- "1970-01-01 00:00:00.000000001 Why Twitter Stock Fell This Week \n",
318
- "1970-01-01 00:00:00.000000002 S&P 500 Earnings Update: Earnings Yield Above ... \n",
319
- "1970-01-01 00:00:00.000000003 Why Tesla Stock Was Slammed on Friday \n",
320
- "1970-01-01 00:00:00.000000004 S&P 500 Falls For 7th Consecutive Week Amid On... \n",
321
  "\n",
322
- " author \\\n",
323
- "1970-01-01 00:00:00.000000000 Bibhu Pattnaik \n",
324
- "1970-01-01 00:00:00.000000001 newsfeedback@fool.com (Daniel Sparks) \n",
325
- "1970-01-01 00:00:00.000000002 Brian Gilmartin, CFA \n",
326
- "1970-01-01 00:00:00.000000003 newsfeedback@fool.com (Daniel Sparks) \n",
327
- "1970-01-01 00:00:00.000000004 Henry Khederian \n",
328
  "\n",
329
  " article_url \\\n",
330
- "1970-01-01 00:00:00.000000000 https://www.benzinga.com/news/22/05/27337474/e... \n",
331
- "1970-01-01 00:00:00.000000001 https://www.fool.com/investing/2022/05/21/why-... \n",
332
- "1970-01-01 00:00:00.000000002 https://seekingalpha.com/article/4513558-sp500... \n",
333
- "1970-01-01 00:00:00.000000003 https://www.fool.com/investing/2022/05/20/why-... \n",
334
- "1970-01-01 00:00:00.000000004 https://www.benzinga.com/news/22/05/27332539/s... \n",
335
  "\n",
336
- " tickers \\\n",
337
- "1970-01-01 00:00:00.000000000 [TSLA, TWTR] \n",
338
- "1970-01-01 00:00:00.000000001 [TWTR, TSLA] \n",
339
- "1970-01-01 00:00:00.000000002 [WMT, TSLA, TGT, BAPR, BAUG, BJUL, BJUN, BMAR,... \n",
340
- "1970-01-01 00:00:00.000000003 [TSLA, TWTR] \n",
341
- "1970-01-01 00:00:00.000000004 [DIA, SPY, ROST, AMT, DE, LLY, VFC, TSLA, QQQ] \n",
342
  "\n",
343
  " amp_url \\\n",
344
- "1970-01-01 00:00:00.000000000 https://www.benzinga.com/amp/content/27337474 \n",
345
  "1970-01-01 00:00:00.000000001 No URL provided \n",
346
- "1970-01-01 00:00:00.000000002 https://seekingalpha.com/amp/article/4513558-s... \n",
347
- "1970-01-01 00:00:00.000000003 No URL provided \n",
348
- "1970-01-01 00:00:00.000000004 https://www.benzinga.com/amp/content/27332539 \n",
349
  "\n",
350
  " image_url \\\n",
351
  "1970-01-01 00:00:00.000000000 https://cdn.benzinga.com/files/images/story/20... \n",
352
- "1970-01-01 00:00:00.000000001 https://g.foolcdn.com/editorial/images/681359/... \n",
353
- "1970-01-01 00:00:00.000000002 https://static.seekingalpha.com/cdn/s3/uploads... \n",
354
- "1970-01-01 00:00:00.000000003 https://g.foolcdn.com/editorial/images/681336/... \n",
355
- "1970-01-01 00:00:00.000000004 https://cdn.benzinga.com/files/images/story/20... \n",
356
  "\n",
357
  " description \\\n",
358
- "1970-01-01 00:00:00.000000000 Tesla Inc (NASDAQ: TSLA) CEO Elon Musk seems t... \n",
359
- "1970-01-01 00:00:00.000000001 Is the company's $44 billion deal at risk of n... \n",
360
- "1970-01-01 00:00:00.000000002 The S&P 500 earnings yield is 6.03% vs last we... \n",
361
- "1970-01-01 00:00:00.000000003 The electric-car maker's stock has been gettin... \n",
362
- "1970-01-01 00:00:00.000000004 U.S. indices rebounded off session lows Friday... \n",
363
- "\n",
364
- " keywords \\\n",
365
- "1970-01-01 00:00:00.000000000 [News, Top Stories, Tech] \n",
366
- "1970-01-01 00:00:00.000000001 [investing] \n",
367
- "1970-01-01 00:00:00.000000002 No keywords \n",
368
- "1970-01-01 00:00:00.000000003 [investing] \n",
369
- "1970-01-01 00:00:00.000000004 [News, After-Hours Center, Markets, Movers, Tr... \n",
370
- "\n",
371
- " sentiment time \n",
372
- "1970-01-01 00:00:00.000000000 -0.064242 19:39:28 \n",
373
- "1970-01-01 00:00:00.000000001 0.000000 12:16:04 \n",
374
- "1970-01-01 00:00:00.000000002 0.000000 04:00:00 \n",
375
- "1970-01-01 00:00:00.000000003 0.187500 22:56:26 \n",
376
- "1970-01-01 00:00:00.000000004 0.096032 20:17:20 "
377
  ]
378
  },
379
- "execution_count": 8,
380
  "metadata": {},
381
  "output_type": "execute_result"
382
  }
@@ -387,7 +380,7 @@
387
  },
388
  {
389
  "cell_type": "code",
390
- "execution_count": 9,
391
  "metadata": {},
392
  "outputs": [],
393
  "source": [
@@ -396,7 +389,7 @@
396
  },
397
  {
398
  "cell_type": "code",
399
- "execution_count": 10,
400
  "metadata": {},
401
  "outputs": [],
402
  "source": [
@@ -405,7 +398,7 @@
405
  },
406
  {
407
  "cell_type": "code",
408
- "execution_count": 11,
409
  "metadata": {},
410
  "outputs": [],
411
  "source": [
@@ -414,7 +407,7 @@
414
  },
415
  {
416
  "cell_type": "code",
417
- "execution_count": 12,
418
  "metadata": {},
419
  "outputs": [],
420
  "source": [
@@ -423,7 +416,7 @@
423
  },
424
  {
425
  "cell_type": "code",
426
- "execution_count": 13,
427
  "metadata": {},
428
  "outputs": [
429
  {
@@ -465,89 +458,316 @@
465
  " </thead>\n",
466
  " <tbody>\n",
467
  " <tr>\n",
468
- " <th>1970-01-01 00:00:00.000000711</th>\n",
469
- " <td>2024-03-27</td>\n",
470
- " <td>rD3Jh821u6EVUukLdPuuvheL8Iry8BIAHdPj15YgbJ4</td>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471
  " <td>{'name': 'Benzinga', 'homepage_url': 'https://...</td>\n",
472
- " <td>Tesla-CATL 'Power Couple' Can Recharge US EV M...</td>\n",
473
- " <td>Anan Ashraf</td>\n",
474
  " <td>https://www.benzinga.com/analyst-ratings/analy...</td>\n",
475
  " <td>[TSLA]</td>\n",
476
- " <td>https://www.benzinga.com/amp/content/37950620</td>\n",
477
  " <td>https://cdn.benzinga.com/files/images/story/20...</td>\n",
478
- " <td>Morgan Stanley analyst and Tesla Inc (NASDAQ:T...</td>\n",
479
- " <td>[News, Analyst Color, Tech]</td>\n",
480
- " <td>0.010000</td>\n",
481
- " <td>03:22:17</td>\n",
482
- " <td>0.010000</td>\n",
483
  " </tr>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
484
  " <tr>\n",
485
- " <th>1970-01-01 00:00:00.000000710</th>\n",
486
- " <td>2024-03-27</td>\n",
487
- " <td>gPdvI5l7YFrkcBSZSK_ZgKkYlay8UIuDpFlOroyG7d4</td>\n",
488
- " <td>{'name': 'Benzinga', 'homepage_url': 'https://...</td>\n",
489
- " <td>Investor Sentiment Falls Further, S&amp;P 500 Fall...</td>\n",
490
- " <td>Avi Kapoor</td>\n",
491
- " <td>https://www.benzinga.com/news/earnings/24/03/3...</td>\n",
492
- " <td>[CTAS, STX, CCL, TSLA, RH]</td>\n",
493
- " <td>https://www.benzinga.com/amp/content/37951150</td>\n",
494
- " <td>https://cdn.benzinga.com/files/images/story/20...</td>\n",
495
- " <td>The CNN Money Fear and Greed index showed a fu...</td>\n",
496
- " <td>[News, Earnings, Pre-Market Outlook, Markets, ...</td>\n",
497
- " <td>0.157812</td>\n",
498
- " <td>08:00:27</td>\n",
499
- " <td>0.046953</td>\n",
500
  " </tr>\n",
501
  " <tr>\n",
502
- " <th>1970-01-01 00:00:00.000000709</th>\n",
503
- " <td>2024-03-27</td>\n",
504
- " <td>aNETwfELk3fnJHMejDxAXfpu931S-zaC93cs-pD7cS0</td>\n",
505
- " <td>{'name': 'The Motley Fool', 'homepage_url': 'h...</td>\n",
506
- " <td>Forget Tesla: 1 Unstoppable Artificial Intelli...</td>\n",
507
- " <td>newsfeedback@fool.com (Anthony Di Pizio)</td>\n",
508
- " <td>https://www.fool.com/investing/2024/03/27/forg...</td>\n",
509
- " <td>[ORCL, TSLA, META, MSFT, GOOGL, AAPL, AMZN, NV...</td>\n",
510
- " <td>No URL provided</td>\n",
511
- " <td>https://g.foolcdn.com/editorial/images/770375/...</td>\n",
512
- " <td>Tesla stock is down 31% already in 2024, jeopa...</td>\n",
513
- " <td>[investing]</td>\n",
514
- " <td>0.172222</td>\n",
515
- " <td>08:27:00</td>\n",
516
- " <td>0.078270</td>\n",
517
  " </tr>\n",
518
  " <tr>\n",
519
- " <th>1970-01-01 00:00:00.000000708</th>\n",
520
- " <td>2024-03-27</td>\n",
521
- " <td>QzI8mX8__zTvRWgwqgSYPjAD49Wi5sqsn1-5gYQoNXU</td>\n",
522
  " <td>{'name': 'The Motley Fool', 'homepage_url': 'h...</td>\n",
523
- " <td>This Fantastic Stock Has Outperformed Tesla in...</td>\n",
524
- " <td>newsfeedback@fool.com (Neil Rozenbaum)</td>\n",
525
- " <td>https://www.fool.com/investing/2024/03/27/this...</td>\n",
526
- " <td>[LULU, TSLA]</td>\n",
527
  " <td>No URL provided</td>\n",
528
- " <td>https://g.foolcdn.com/editorial/images/770691/...</td>\n",
529
- " <td>Lululemon stock has crashed 20% since reportin...</td>\n",
530
  " <td>[investing]</td>\n",
531
- " <td>0.000000</td>\n",
532
- " <td>09:30:00</td>\n",
533
- " <td>0.058703</td>\n",
534
  " </tr>\n",
535
  " <tr>\n",
536
- " <th>1970-01-01 00:00:00.000000707</th>\n",
537
- " <td>2024-03-27</td>\n",
538
- " <td>UubTuww0IbuYZIBCvNK2f_cK_xfOImnbHahPNHEPSVY</td>\n",
539
- " <td>{'name': 'The Motley Fool', 'homepage_url': 'h...</td>\n",
540
- " <td>Forget Tesla: I Think This Stock Should Replac...</td>\n",
541
- " <td>newsfeedback@fool.com (Ryan Vanzo)</td>\n",
542
- " <td>https://www.fool.com/investing/2024/03/27/forg...</td>\n",
543
- " <td>[V, TSLA, META, GOOGL, NVDA, GOOG]</td>\n",
544
  " <td>No URL provided</td>\n",
545
- " <td>https://g.foolcdn.com/editorial/images/769403/...</td>\n",
546
- " <td>Tesla stock has lost 30% of its value in three...</td>\n",
547
- " <td>[investing]</td>\n",
548
- " <td>1.000000</td>\n",
549
- " <td>09:50:00</td>\n",
550
- " <td>0.294027</td>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
551
  " </tr>\n",
552
  " </tbody>\n",
553
  "</table>\n",
@@ -555,97 +775,182 @@
555
  ],
556
  "text/plain": [
557
  " date \\\n",
558
- "1970-01-01 00:00:00.000000711 2024-03-27 \n",
559
- "1970-01-01 00:00:00.000000710 2024-03-27 \n",
560
- "1970-01-01 00:00:00.000000709 2024-03-27 \n",
561
- "1970-01-01 00:00:00.000000708 2024-03-27 \n",
562
- "1970-01-01 00:00:00.000000707 2024-03-27 \n",
563
  "\n",
564
  " id \\\n",
565
- "1970-01-01 00:00:00.000000711 rD3Jh821u6EVUukLdPuuvheL8Iry8BIAHdPj15YgbJ4 \n",
566
- "1970-01-01 00:00:00.000000710 gPdvI5l7YFrkcBSZSK_ZgKkYlay8UIuDpFlOroyG7d4 \n",
567
- "1970-01-01 00:00:00.000000709 aNETwfELk3fnJHMejDxAXfpu931S-zaC93cs-pD7cS0 \n",
568
- "1970-01-01 00:00:00.000000708 QzI8mX8__zTvRWgwqgSYPjAD49Wi5sqsn1-5gYQoNXU \n",
569
- "1970-01-01 00:00:00.000000707 UubTuww0IbuYZIBCvNK2f_cK_xfOImnbHahPNHEPSVY \n",
570
  "\n",
571
  " publisher \\\n",
572
- "1970-01-01 00:00:00.000000711 {'name': 'Benzinga', 'homepage_url': 'https://... \n",
573
- "1970-01-01 00:00:00.000000710 {'name': 'Benzinga', 'homepage_url': 'https://... \n",
574
- "1970-01-01 00:00:00.000000709 {'name': 'The Motley Fool', 'homepage_url': 'h... \n",
575
- "1970-01-01 00:00:00.000000708 {'name': 'The Motley Fool', 'homepage_url': 'h... \n",
576
- "1970-01-01 00:00:00.000000707 {'name': 'The Motley Fool', 'homepage_url': 'h... \n",
577
  "\n",
578
  " title \\\n",
579
- "1970-01-01 00:00:00.000000711 Tesla-CATL 'Power Couple' Can Recharge US EV M... \n",
580
- "1970-01-01 00:00:00.000000710 Investor Sentiment Falls Further, S&P 500 Fall... \n",
581
- "1970-01-01 00:00:00.000000709 Forget Tesla: 1 Unstoppable Artificial Intelli... \n",
582
- "1970-01-01 00:00:00.000000708 This Fantastic Stock Has Outperformed Tesla in... \n",
583
- "1970-01-01 00:00:00.000000707 Forget Tesla: I Think This Stock Should Replac... \n",
584
- "\n",
585
- " author \\\n",
586
- "1970-01-01 00:00:00.000000711 Anan Ashraf \n",
587
- "1970-01-01 00:00:00.000000710 Avi Kapoor \n",
588
- "1970-01-01 00:00:00.000000709 newsfeedback@fool.com (Anthony Di Pizio) \n",
589
- "1970-01-01 00:00:00.000000708 newsfeedback@fool.com (Neil Rozenbaum) \n",
590
- "1970-01-01 00:00:00.000000707 newsfeedback@fool.com (Ryan Vanzo) \n",
591
  "\n",
592
  " article_url \\\n",
593
- "1970-01-01 00:00:00.000000711 https://www.benzinga.com/analyst-ratings/analy... \n",
594
- "1970-01-01 00:00:00.000000710 https://www.benzinga.com/news/earnings/24/03/3... \n",
595
- "1970-01-01 00:00:00.000000709 https://www.fool.com/investing/2024/03/27/forg... \n",
596
- "1970-01-01 00:00:00.000000708 https://www.fool.com/investing/2024/03/27/this... \n",
597
- "1970-01-01 00:00:00.000000707 https://www.fool.com/investing/2024/03/27/forg... \n",
598
  "\n",
599
- " tickers \\\n",
600
- "1970-01-01 00:00:00.000000711 [TSLA] \n",
601
- "1970-01-01 00:00:00.000000710 [CTAS, STX, CCL, TSLA, RH] \n",
602
- "1970-01-01 00:00:00.000000709 [ORCL, TSLA, META, MSFT, GOOGL, AAPL, AMZN, NV... \n",
603
- "1970-01-01 00:00:00.000000708 [LULU, TSLA] \n",
604
- "1970-01-01 00:00:00.000000707 [V, TSLA, META, GOOGL, NVDA, GOOG] \n",
605
- "\n",
606
- " amp_url \\\n",
607
- "1970-01-01 00:00:00.000000711 https://www.benzinga.com/amp/content/37950620 \n",
608
- "1970-01-01 00:00:00.000000710 https://www.benzinga.com/amp/content/37951150 \n",
609
- "1970-01-01 00:00:00.000000709 No URL provided \n",
610
- "1970-01-01 00:00:00.000000708 No URL provided \n",
611
- "1970-01-01 00:00:00.000000707 No URL provided \n",
612
  "\n",
613
  " image_url \\\n",
614
- "1970-01-01 00:00:00.000000711 https://cdn.benzinga.com/files/images/story/20... \n",
615
- "1970-01-01 00:00:00.000000710 https://cdn.benzinga.com/files/images/story/20... \n",
616
- "1970-01-01 00:00:00.000000709 https://g.foolcdn.com/editorial/images/770375/... \n",
617
- "1970-01-01 00:00:00.000000708 https://g.foolcdn.com/editorial/images/770691/... \n",
618
- "1970-01-01 00:00:00.000000707 https://g.foolcdn.com/editorial/images/769403/... \n",
619
  "\n",
620
  " description \\\n",
621
- "1970-01-01 00:00:00.000000711 Morgan Stanley analyst and Tesla Inc (NASDAQ:T... \n",
622
- "1970-01-01 00:00:00.000000710 The CNN Money Fear and Greed index showed a fu... \n",
623
- "1970-01-01 00:00:00.000000709 Tesla stock is down 31% already in 2024, jeopa... \n",
624
- "1970-01-01 00:00:00.000000708 Lululemon stock has crashed 20% since reportin... \n",
625
- "1970-01-01 00:00:00.000000707 Tesla stock has lost 30% of its value in three... \n",
626
- "\n",
627
- " keywords \\\n",
628
- "1970-01-01 00:00:00.000000711 [News, Analyst Color, Tech] \n",
629
- "1970-01-01 00:00:00.000000710 [News, Earnings, Pre-Market Outlook, Markets, ... \n",
630
- "1970-01-01 00:00:00.000000709 [investing] \n",
631
- "1970-01-01 00:00:00.000000708 [investing] \n",
632
- "1970-01-01 00:00:00.000000707 [investing] \n",
633
  "\n",
634
- " sentiment time exp_mean_7_days \n",
635
- "1970-01-01 00:00:00.000000711 0.010000 03:22:17 0.010000 \n",
636
- "1970-01-01 00:00:00.000000710 0.157812 08:00:27 0.046953 \n",
637
- "1970-01-01 00:00:00.000000709 0.172222 08:27:00 0.078270 \n",
638
- "1970-01-01 00:00:00.000000708 0.000000 09:30:00 0.058703 \n",
639
- "1970-01-01 00:00:00.000000707 1.000000 09:50:00 0.294027 "
 
 
 
 
 
 
 
640
  ]
641
  },
642
- "execution_count": 13,
643
  "metadata": {},
644
  "output_type": "execute_result"
645
  }
646
  ],
647
  "source": [
648
- "df_processed.head()"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
649
  ]
650
  },
651
  {
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 14,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
 
17
  },
18
  {
19
  "cell_type": "code",
20
+ "execution_count": 15,
21
  "metadata": {},
22
  "outputs": [
23
  {
 
26
  "True"
27
  ]
28
  },
29
+ "execution_count": 15,
30
  "metadata": {},
31
  "output_type": "execute_result"
32
  }
 
37
  },
38
  {
39
  "cell_type": "code",
40
+ "execution_count": 16,
41
  "metadata": {},
42
  "outputs": [
43
  {
44
  "name": "stdout",
45
  "output_type": "stream",
46
  "text": [
47
+ "Fetched 50 articles from 2022-05-03 to 2022-06-22\n",
48
+ "Fetched 50 articles from 2022-06-23 to 2022-08-12\n",
49
+ "Fetched 50 articles from 2022-08-13 to 2022-10-02\n",
50
+ "Fetched 50 articles from 2022-10-03 to 2022-11-22\n",
51
+ "Fetched 50 articles from 2022-11-23 to 2023-01-12\n",
52
  "Rate limit reached. Waiting to retry...\n",
53
+ "Fetched 50 articles from 2023-01-13 to 2023-03-04\n",
54
+ "Fetched 50 articles from 2023-03-05 to 2023-04-24\n",
55
+ "Fetched 50 articles from 2023-04-25 to 2023-06-14\n",
56
+ "Fetched 50 articles from 2023-06-15 to 2023-08-04\n",
57
+ "Fetched 50 articles from 2023-08-05 to 2023-09-24\n",
58
  "Rate limit reached. Waiting to retry...\n",
59
+ "Fetched 50 articles from 2023-09-25 to 2023-11-14\n",
60
+ "Fetched 50 articles from 2023-11-15 to 2024-01-04\n",
61
+ "Fetched 50 articles from 2024-01-05 to 2024-02-24\n",
62
+ "Fetched 50 articles from 2024-02-25 to 2024-04-15\n",
63
+ "Fetched 50 articles from 2024-04-16 to 2024-05-02\n",
64
  "Total articles fetched: 750\n"
65
  ]
66
  }
 
120
  },
121
  {
122
  "cell_type": "code",
123
+ "execution_count": 17,
124
  "metadata": {},
125
  "outputs": [],
126
  "source": [
 
130
  },
131
  {
132
  "cell_type": "code",
133
+ "execution_count": 18,
134
  "metadata": {},
135
  "outputs": [
136
  {
 
138
  "output_type": "stream",
139
  "text": [
140
  "<class 'pandas.core.frame.DataFrame'>\n",
141
+ "DatetimeIndex: 720 entries, 1970-01-01 00:00:00 to 1970-01-01 00:00:00.000000719\n",
142
  "Data columns (total 13 columns):\n",
143
  " # Column Non-Null Count Dtype \n",
144
  "--- ------ -------------- ----- \n",
145
+ " 0 date 720 non-null object \n",
146
+ " 1 id 720 non-null object \n",
147
+ " 2 publisher 720 non-null object \n",
148
+ " 3 title 720 non-null object \n",
149
+ " 4 author 720 non-null object \n",
150
+ " 5 article_url 720 non-null object \n",
151
+ " 6 tickers 720 non-null object \n",
152
+ " 7 amp_url 720 non-null object \n",
153
+ " 8 image_url 720 non-null object \n",
154
+ " 9 description 720 non-null object \n",
155
+ " 10 keywords 720 non-null object \n",
156
+ " 11 sentiment 720 non-null float64\n",
157
+ " 12 time 720 non-null object \n",
158
  "dtypes: float64(1), object(12)\n",
159
+ "memory usage: 78.8+ KB\n"
160
  ]
161
  }
162
  ],
 
166
  },
167
  {
168
  "cell_type": "code",
169
+ "execution_count": 19,
170
  "metadata": {},
171
  "outputs": [
172
  {
 
208
  " <tbody>\n",
209
  " <tr>\n",
210
  " <th>1970-01-01 00:00:00.000000000</th>\n",
211
+ " <td>2022-06-22</td>\n",
212
+ " <td>nVZWipNVQFFoSi87tF0APXW_uRHl13wat_uR1c0iR6A</td>\n",
213
  " <td>{'name': 'Benzinga', 'homepage_url': 'https://...</td>\n",
214
+ " <td>Elon Musk Gives New Update On Tesla Cybertruck...</td>\n",
215
+ " <td>Chris Katje</td>\n",
216
+ " <td>https://www.benzinga.com/news/22/06/27820587/e...</td>\n",
217
+ " <td>[F, TSLA, RIVN]</td>\n",
218
+ " <td>https://www.benzinga.com/amp/content/27820587</td>\n",
219
  " <td>https://cdn.benzinga.com/files/images/story/20...</td>\n",
220
+ " <td>A recent interview between Tesla Inc (NASDAQ: ...</td>\n",
221
+ " <td>[News, Interview]</td>\n",
222
+ " <td>0.040</td>\n",
223
+ " <td>22:40:56</td>\n",
224
  " </tr>\n",
225
  " <tr>\n",
226
  " <th>1970-01-01 00:00:00.000000001</th>\n",
227
+ " <td>2022-06-22</td>\n",
228
+ " <td>x_9M1zYfZoHn_ptJM3USKxrodDtd4TcTsQSldXlViMw</td>\n",
229
+ " <td>{'name': 'MarketWatch', 'homepage_url': 'https...</td>\n",
230
+ " <td>Tesla's new factories are 'gigantic money furn...</td>\n",
231
+ " <td>MarketWatch</td>\n",
232
+ " <td>https://www.marketwatch.com/story/teslas-new-f...</td>\n",
233
+ " <td>[TSLA]</td>\n",
234
  " <td>No URL provided</td>\n",
235
+ " <td>https://images.mktw.net/im-569600/social</td>\n",
236
+ " <td>Tesla Inc.’s two newest car factories have bee...</td>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  " <td>No keywords</td>\n",
238
+ " <td>0.000</td>\n",
239
+ " <td>21:59:00</td>\n",
240
  " </tr>\n",
241
  " <tr>\n",
242
+ " <th>1970-01-01 00:00:00.000000002</th>\n",
243
+ " <td>2022-06-22</td>\n",
244
+ " <td>SDepdS_qKyhE6vqbR6Fathnn81fYDapZCk3DKT1Xpv4</td>\n",
245
  " <td>{'name': 'The Motley Fool', 'homepage_url': 'h...</td>\n",
246
+ " <td>Why Tesla Shares Jumped Initially, Then Retrea...</td>\n",
247
+ " <td>newsfeedback@fool.com (Chris Neiger)</td>\n",
248
+ " <td>https://www.fool.com/investing/2022/06/22/why-...</td>\n",
249
+ " <td>[TSLA]</td>\n",
250
  " <td>No URL provided</td>\n",
251
+ " <td>https://g.foolcdn.com/editorial/images/686400/...</td>\n",
252
+ " <td>Tesla's Shanghai plant may temporarily suspend...</td>\n",
253
  " <td>[investing]</td>\n",
254
+ " <td>0.000</td>\n",
255
+ " <td>19:33:04</td>\n",
256
+ " </tr>\n",
257
+ " <tr>\n",
258
+ " <th>1970-01-01 00:00:00.000000003</th>\n",
259
+ " <td>2022-06-22</td>\n",
260
+ " <td>xTSACaU2z-Mzqa23BiUeHTEUuY_jbVTWOYFkN3uszxg</td>\n",
261
+ " <td>{'name': 'MarketWatch', 'homepage_url': 'https...</td>\n",
262
+ " <td>These are the 10 used-car models whose prices ...</td>\n",
263
+ " <td>MarketWatch</td>\n",
264
+ " <td>https://www.marketwatch.com/story/these-are-th...</td>\n",
265
+ " <td>[LOTZ, TSLA]</td>\n",
266
+ " <td>https://www.marketwatch.com/amp/story/these-ar...</td>\n",
267
+ " <td>https://images.mktw.net/im-569120/social</td>\n",
268
+ " <td>Used-car prices rose on average 17% in May, wi...</td>\n",
269
+ " <td>No keywords</td>\n",
270
+ " <td>0.225</td>\n",
271
+ " <td>17:32:00</td>\n",
272
  " </tr>\n",
273
  " <tr>\n",
274
  " <th>1970-01-01 00:00:00.000000004</th>\n",
275
+ " <td>2022-06-22</td>\n",
276
+ " <td>pksGB2t-5ukDnjkRw_VYr9sfcSOExNmQiHzUq9_rBa0</td>\n",
277
+ " <td>{'name': 'Zacks Investment Research', 'homepag...</td>\n",
278
+ " <td>Tesla (TSLA) to Cut 10% Salaried Jobs, Raise H...</td>\n",
279
+ " <td>Zacks Equity Research</td>\n",
280
+ " <td>https://www.zacks.com/stock/news/1942395/tesla...</td>\n",
281
+ " <td>[SMP, TSLA, WNC, FOXF]</td>\n",
282
+ " <td>https://www.zacks.com/amp/stock/news/1942395/t...</td>\n",
283
+ " <td>https://staticx-tuner.zacks.com/images/article...</td>\n",
284
+ " <td>Tesla (TSLA) to lay off 10% of its salaried wo...</td>\n",
285
+ " <td>No keywords</td>\n",
286
+ " <td>0.000</td>\n",
287
+ " <td>15:58:00</td>\n",
288
  " </tr>\n",
289
  " </tbody>\n",
290
  "</table>\n",
 
292
  ],
293
  "text/plain": [
294
  " date \\\n",
295
+ "1970-01-01 00:00:00.000000000 2022-06-22 \n",
296
+ "1970-01-01 00:00:00.000000001 2022-06-22 \n",
297
+ "1970-01-01 00:00:00.000000002 2022-06-22 \n",
298
+ "1970-01-01 00:00:00.000000003 2022-06-22 \n",
299
+ "1970-01-01 00:00:00.000000004 2022-06-22 \n",
300
  "\n",
301
  " id \\\n",
302
+ "1970-01-01 00:00:00.000000000 nVZWipNVQFFoSi87tF0APXW_uRHl13wat_uR1c0iR6A \n",
303
+ "1970-01-01 00:00:00.000000001 x_9M1zYfZoHn_ptJM3USKxrodDtd4TcTsQSldXlViMw \n",
304
+ "1970-01-01 00:00:00.000000002 SDepdS_qKyhE6vqbR6Fathnn81fYDapZCk3DKT1Xpv4 \n",
305
+ "1970-01-01 00:00:00.000000003 xTSACaU2z-Mzqa23BiUeHTEUuY_jbVTWOYFkN3uszxg \n",
306
+ "1970-01-01 00:00:00.000000004 pksGB2t-5ukDnjkRw_VYr9sfcSOExNmQiHzUq9_rBa0 \n",
307
  "\n",
308
  " publisher \\\n",
309
  "1970-01-01 00:00:00.000000000 {'name': 'Benzinga', 'homepage_url': 'https://... \n",
310
+ "1970-01-01 00:00:00.000000001 {'name': 'MarketWatch', 'homepage_url': 'https... \n",
311
+ "1970-01-01 00:00:00.000000002 {'name': 'The Motley Fool', 'homepage_url': 'h... \n",
312
+ "1970-01-01 00:00:00.000000003 {'name': 'MarketWatch', 'homepage_url': 'https... \n",
313
+ "1970-01-01 00:00:00.000000004 {'name': 'Zacks Investment Research', 'homepag... \n",
314
  "\n",
315
  " title \\\n",
316
+ "1970-01-01 00:00:00.000000000 Elon Musk Gives New Update On Tesla Cybertruck... \n",
317
+ "1970-01-01 00:00:00.000000001 Tesla's new factories are 'gigantic money furn... \n",
318
+ "1970-01-01 00:00:00.000000002 Why Tesla Shares Jumped Initially, Then Retrea... \n",
319
+ "1970-01-01 00:00:00.000000003 These are the 10 used-car models whose prices ... \n",
320
+ "1970-01-01 00:00:00.000000004 Tesla (TSLA) to Cut 10% Salaried Jobs, Raise H... \n",
321
  "\n",
322
+ " author \\\n",
323
+ "1970-01-01 00:00:00.000000000 Chris Katje \n",
324
+ "1970-01-01 00:00:00.000000001 MarketWatch \n",
325
+ "1970-01-01 00:00:00.000000002 newsfeedback@fool.com (Chris Neiger) \n",
326
+ "1970-01-01 00:00:00.000000003 MarketWatch \n",
327
+ "1970-01-01 00:00:00.000000004 Zacks Equity Research \n",
328
  "\n",
329
  " article_url \\\n",
330
+ "1970-01-01 00:00:00.000000000 https://www.benzinga.com/news/22/06/27820587/e... \n",
331
+ "1970-01-01 00:00:00.000000001 https://www.marketwatch.com/story/teslas-new-f... \n",
332
+ "1970-01-01 00:00:00.000000002 https://www.fool.com/investing/2022/06/22/why-... \n",
333
+ "1970-01-01 00:00:00.000000003 https://www.marketwatch.com/story/these-are-th... \n",
334
+ "1970-01-01 00:00:00.000000004 https://www.zacks.com/stock/news/1942395/tesla... \n",
335
  "\n",
336
+ " tickers \\\n",
337
+ "1970-01-01 00:00:00.000000000 [F, TSLA, RIVN] \n",
338
+ "1970-01-01 00:00:00.000000001 [TSLA] \n",
339
+ "1970-01-01 00:00:00.000000002 [TSLA] \n",
340
+ "1970-01-01 00:00:00.000000003 [LOTZ, TSLA] \n",
341
+ "1970-01-01 00:00:00.000000004 [SMP, TSLA, WNC, FOXF] \n",
342
  "\n",
343
  " amp_url \\\n",
344
+ "1970-01-01 00:00:00.000000000 https://www.benzinga.com/amp/content/27820587 \n",
345
  "1970-01-01 00:00:00.000000001 No URL provided \n",
346
+ "1970-01-01 00:00:00.000000002 No URL provided \n",
347
+ "1970-01-01 00:00:00.000000003 https://www.marketwatch.com/amp/story/these-ar... \n",
348
+ "1970-01-01 00:00:00.000000004 https://www.zacks.com/amp/stock/news/1942395/t... \n",
349
  "\n",
350
  " image_url \\\n",
351
  "1970-01-01 00:00:00.000000000 https://cdn.benzinga.com/files/images/story/20... \n",
352
+ "1970-01-01 00:00:00.000000001 https://images.mktw.net/im-569600/social \n",
353
+ "1970-01-01 00:00:00.000000002 https://g.foolcdn.com/editorial/images/686400/... \n",
354
+ "1970-01-01 00:00:00.000000003 https://images.mktw.net/im-569120/social \n",
355
+ "1970-01-01 00:00:00.000000004 https://staticx-tuner.zacks.com/images/article... \n",
356
  "\n",
357
  " description \\\n",
358
+ "1970-01-01 00:00:00.000000000 A recent interview between Tesla Inc (NASDAQ: ... \n",
359
+ "1970-01-01 00:00:00.000000001 Tesla Inc.’s two newest car factories have bee... \n",
360
+ "1970-01-01 00:00:00.000000002 Tesla's Shanghai plant may temporarily suspend... \n",
361
+ "1970-01-01 00:00:00.000000003 Used-car prices rose on average 17% in May, wi... \n",
362
+ "1970-01-01 00:00:00.000000004 Tesla (TSLA) to lay off 10% of its salaried wo... \n",
363
+ "\n",
364
+ " keywords sentiment time \n",
365
+ "1970-01-01 00:00:00.000000000 [News, Interview] 0.040 22:40:56 \n",
366
+ "1970-01-01 00:00:00.000000001 No keywords 0.000 21:59:00 \n",
367
+ "1970-01-01 00:00:00.000000002 [investing] 0.000 19:33:04 \n",
368
+ "1970-01-01 00:00:00.000000003 No keywords 0.225 17:32:00 \n",
369
+ "1970-01-01 00:00:00.000000004 No keywords 0.000 15:58:00 "
 
 
 
 
 
 
 
370
  ]
371
  },
372
+ "execution_count": 19,
373
  "metadata": {},
374
  "output_type": "execute_result"
375
  }
 
380
  },
381
  {
382
  "cell_type": "code",
383
+ "execution_count": 20,
384
  "metadata": {},
385
  "outputs": [],
386
  "source": [
 
389
  },
390
  {
391
  "cell_type": "code",
392
+ "execution_count": 21,
393
  "metadata": {},
394
  "outputs": [],
395
  "source": [
 
398
  },
399
  {
400
  "cell_type": "code",
401
+ "execution_count": 22,
402
  "metadata": {},
403
  "outputs": [],
404
  "source": [
 
407
  },
408
  {
409
  "cell_type": "code",
410
+ "execution_count": 23,
411
  "metadata": {},
412
  "outputs": [],
413
  "source": [
 
416
  },
417
  {
418
  "cell_type": "code",
419
+ "execution_count": 24,
420
  "metadata": {},
421
  "outputs": [
422
  {
 
458
  " </thead>\n",
459
  " <tbody>\n",
460
  " <tr>\n",
461
+ " <th>1970-01-01 00:00:00.000000719</th>\n",
462
+ " <td>2024-04-29</td>\n",
463
+ " <td>MeWGIZiKn6J3JCwWAkHNqVv6Cc9HToUK-HmodQSesdM</td>\n",
464
+ " <td>{'name': 'The Motley Fool', 'homepage_url': 'h...</td>\n",
465
+ " <td>Why Baidu Stock Jumped Today</td>\n",
466
+ " <td>newsfeedback@fool.com (James Brumley)</td>\n",
467
+ " <td>https://www.fool.com/investing/2024/04/29/why-...</td>\n",
468
+ " <td>[BIDU, GOOGL, TSLA, GOOG, IQ]</td>\n",
469
+ " <td>No URL provided</td>\n",
470
+ " <td>https://g.foolcdn.com/editorial/images/774939/...</td>\n",
471
+ " <td>It's getting difficult not to notice how impre...</td>\n",
472
+ " <td>[investing]</td>\n",
473
+ " <td>0.250000</td>\n",
474
+ " <td>21:26:09</td>\n",
475
+ " <td>0.250000</td>\n",
476
+ " </tr>\n",
477
+ " <tr>\n",
478
+ " <th>1970-01-01 00:00:00.000000718</th>\n",
479
+ " <td>2024-04-29</td>\n",
480
+ " <td>T9MgJwXEmlRjWpkmLvcwnBggkbeXWWoGzFISY65WwBc</td>\n",
481
+ " <td>{'name': 'Zacks Investment Research', 'homepag...</td>\n",
482
+ " <td>Markets Up on Tesla, Q1 Earnings; Q1 Beats Aft...</td>\n",
483
+ " <td>Mark Vickery</td>\n",
484
+ " <td>https://www.zacks.com/stock/news/2264549/marke...</td>\n",
485
+ " <td>[AMZN, AMD, KO, LLY, SBUX, MCD, NXPI, TSLA, YU...</td>\n",
486
+ " <td>https://www.zacks.com/amp/stock/news/2264549/m...</td>\n",
487
+ " <td>https://staticx-tuner.zacks.com/images/article...</td>\n",
488
+ " <td>It's now the third straight day higher going b...</td>\n",
489
+ " <td>No keywords</td>\n",
490
+ " <td>0.111905</td>\n",
491
+ " <td>22:10:00</td>\n",
492
+ " <td>0.215476</td>\n",
493
+ " </tr>\n",
494
+ " <tr>\n",
495
+ " <th>1970-01-01 00:00:00.000000717</th>\n",
496
+ " <td>2024-04-30</td>\n",
497
+ " <td>xaUyg2qUKK7h_EDkKruXR9KdY_drlcXLai14uHvZTsc</td>\n",
498
+ " <td>{'name': 'Seeking Alpha', 'homepage_url': 'htt...</td>\n",
499
+ " <td>Big Tech Earnings Beats Stymie Q2 2024 Sell-Off</td>\n",
500
+ " <td>Christine Short</td>\n",
501
+ " <td>https://seekingalpha.com/article/4687390-big-t...</td>\n",
502
+ " <td>[AAPL, AMZN, GOOG, GOOGL, HSY, KO, META, MMM, ...</td>\n",
503
+ " <td>No URL provided</td>\n",
504
+ " <td>https://static.seekingalpha.com/cdn/s3/uploads...</td>\n",
505
+ " <td>Markets finally turned positive late last week...</td>\n",
506
+ " <td>No keywords</td>\n",
507
+ " <td>-0.032955</td>\n",
508
+ " <td>04:30:00</td>\n",
509
+ " <td>0.153369</td>\n",
510
+ " </tr>\n",
511
+ " <tr>\n",
512
+ " <th>1970-01-01 00:00:00.000000716</th>\n",
513
+ " <td>2024-04-30</td>\n",
514
+ " <td>IPVxhBMbT73GJJHLQZYPFb8yQpAxzbEuXrJk0dMSt8U</td>\n",
515
+ " <td>{'name': 'Zacks Investment Research', 'homepag...</td>\n",
516
+ " <td>Stock Market News for Apr 30, 2024</td>\n",
517
+ " <td>Zacks Equity Research</td>\n",
518
+ " <td>https://www.zacks.com/stock/news/2264591/stock...</td>\n",
519
+ " <td>[AAPL, TSLA, PARA]</td>\n",
520
+ " <td>https://www.zacks.com/amp/stock/news/2264591/s...</td>\n",
521
+ " <td>https://staticx-tuner.zacks.com/images/article...</td>\n",
522
+ " <td>Wall Street closed higher on Monday to start a...</td>\n",
523
+ " <td>No keywords</td>\n",
524
+ " <td>0.050000</td>\n",
525
+ " <td>07:27:00</td>\n",
526
+ " <td>0.127526</td>\n",
527
+ " </tr>\n",
528
+ " <tr>\n",
529
+ " <th>1970-01-01 00:00:00.000000715</th>\n",
530
+ " <td>2024-04-30</td>\n",
531
+ " <td>6pQAGkGEZvAd76QYnk6aAhhUCVLrUkdOjgnon-ALmsQ</td>\n",
532
  " <td>{'name': 'Benzinga', 'homepage_url': 'https://...</td>\n",
533
+ " <td>'Tesla Has The Endorsement' Of Xi Jinping's Go...</td>\n",
534
+ " <td>Benzinga Neuro</td>\n",
535
  " <td>https://www.benzinga.com/analyst-ratings/analy...</td>\n",
536
  " <td>[TSLA]</td>\n",
537
+ " <td>https://www.benzinga.com/amp/content/38511044</td>\n",
538
  " <td>https://cdn.benzinga.com/files/images/story/20...</td>\n",
539
+ " <td>Tim Higgins, author of “Power Play: Tesla, Elo...</td>\n",
540
+ " <td>[News, Analyst Color, Tech, General]</td>\n",
541
+ " <td>0.008333</td>\n",
542
+ " <td>07:42:58</td>\n",
543
+ " <td>0.097728</td>\n",
544
  " </tr>\n",
545
+ " </tbody>\n",
546
+ "</table>\n",
547
+ "</div>"
548
+ ],
549
+ "text/plain": [
550
+ " date \\\n",
551
+ "1970-01-01 00:00:00.000000719 2024-04-29 \n",
552
+ "1970-01-01 00:00:00.000000718 2024-04-29 \n",
553
+ "1970-01-01 00:00:00.000000717 2024-04-30 \n",
554
+ "1970-01-01 00:00:00.000000716 2024-04-30 \n",
555
+ "1970-01-01 00:00:00.000000715 2024-04-30 \n",
556
+ "\n",
557
+ " id \\\n",
558
+ "1970-01-01 00:00:00.000000719 MeWGIZiKn6J3JCwWAkHNqVv6Cc9HToUK-HmodQSesdM \n",
559
+ "1970-01-01 00:00:00.000000718 T9MgJwXEmlRjWpkmLvcwnBggkbeXWWoGzFISY65WwBc \n",
560
+ "1970-01-01 00:00:00.000000717 xaUyg2qUKK7h_EDkKruXR9KdY_drlcXLai14uHvZTsc \n",
561
+ "1970-01-01 00:00:00.000000716 IPVxhBMbT73GJJHLQZYPFb8yQpAxzbEuXrJk0dMSt8U \n",
562
+ "1970-01-01 00:00:00.000000715 6pQAGkGEZvAd76QYnk6aAhhUCVLrUkdOjgnon-ALmsQ \n",
563
+ "\n",
564
+ " publisher \\\n",
565
+ "1970-01-01 00:00:00.000000719 {'name': 'The Motley Fool', 'homepage_url': 'h... \n",
566
+ "1970-01-01 00:00:00.000000718 {'name': 'Zacks Investment Research', 'homepag... \n",
567
+ "1970-01-01 00:00:00.000000717 {'name': 'Seeking Alpha', 'homepage_url': 'htt... \n",
568
+ "1970-01-01 00:00:00.000000716 {'name': 'Zacks Investment Research', 'homepag... \n",
569
+ "1970-01-01 00:00:00.000000715 {'name': 'Benzinga', 'homepage_url': 'https://... \n",
570
+ "\n",
571
+ " title \\\n",
572
+ "1970-01-01 00:00:00.000000719 Why Baidu Stock Jumped Today \n",
573
+ "1970-01-01 00:00:00.000000718 Markets Up on Tesla, Q1 Earnings; Q1 Beats Aft... \n",
574
+ "1970-01-01 00:00:00.000000717 Big Tech Earnings Beats Stymie Q2 2024 Sell-Off \n",
575
+ "1970-01-01 00:00:00.000000716 Stock Market News for Apr 30, 2024 \n",
576
+ "1970-01-01 00:00:00.000000715 'Tesla Has The Endorsement' Of Xi Jinping's Go... \n",
577
+ "\n",
578
+ " author \\\n",
579
+ "1970-01-01 00:00:00.000000719 newsfeedback@fool.com (James Brumley) \n",
580
+ "1970-01-01 00:00:00.000000718 Mark Vickery \n",
581
+ "1970-01-01 00:00:00.000000717 Christine Short \n",
582
+ "1970-01-01 00:00:00.000000716 Zacks Equity Research \n",
583
+ "1970-01-01 00:00:00.000000715 Benzinga Neuro \n",
584
+ "\n",
585
+ " article_url \\\n",
586
+ "1970-01-01 00:00:00.000000719 https://www.fool.com/investing/2024/04/29/why-... \n",
587
+ "1970-01-01 00:00:00.000000718 https://www.zacks.com/stock/news/2264549/marke... \n",
588
+ "1970-01-01 00:00:00.000000717 https://seekingalpha.com/article/4687390-big-t... \n",
589
+ "1970-01-01 00:00:00.000000716 https://www.zacks.com/stock/news/2264591/stock... \n",
590
+ "1970-01-01 00:00:00.000000715 https://www.benzinga.com/analyst-ratings/analy... \n",
591
+ "\n",
592
+ " tickers \\\n",
593
+ "1970-01-01 00:00:00.000000719 [BIDU, GOOGL, TSLA, GOOG, IQ] \n",
594
+ "1970-01-01 00:00:00.000000718 [AMZN, AMD, KO, LLY, SBUX, MCD, NXPI, TSLA, YU... \n",
595
+ "1970-01-01 00:00:00.000000717 [AAPL, AMZN, GOOG, GOOGL, HSY, KO, META, MMM, ... \n",
596
+ "1970-01-01 00:00:00.000000716 [AAPL, TSLA, PARA] \n",
597
+ "1970-01-01 00:00:00.000000715 [TSLA] \n",
598
+ "\n",
599
+ " amp_url \\\n",
600
+ "1970-01-01 00:00:00.000000719 No URL provided \n",
601
+ "1970-01-01 00:00:00.000000718 https://www.zacks.com/amp/stock/news/2264549/m... \n",
602
+ "1970-01-01 00:00:00.000000717 No URL provided \n",
603
+ "1970-01-01 00:00:00.000000716 https://www.zacks.com/amp/stock/news/2264591/s... \n",
604
+ "1970-01-01 00:00:00.000000715 https://www.benzinga.com/amp/content/38511044 \n",
605
+ "\n",
606
+ " image_url \\\n",
607
+ "1970-01-01 00:00:00.000000719 https://g.foolcdn.com/editorial/images/774939/... \n",
608
+ "1970-01-01 00:00:00.000000718 https://staticx-tuner.zacks.com/images/article... \n",
609
+ "1970-01-01 00:00:00.000000717 https://static.seekingalpha.com/cdn/s3/uploads... \n",
610
+ "1970-01-01 00:00:00.000000716 https://staticx-tuner.zacks.com/images/article... \n",
611
+ "1970-01-01 00:00:00.000000715 https://cdn.benzinga.com/files/images/story/20... \n",
612
+ "\n",
613
+ " description \\\n",
614
+ "1970-01-01 00:00:00.000000719 It's getting difficult not to notice how impre... \n",
615
+ "1970-01-01 00:00:00.000000718 It's now the third straight day higher going b... \n",
616
+ "1970-01-01 00:00:00.000000717 Markets finally turned positive late last week... \n",
617
+ "1970-01-01 00:00:00.000000716 Wall Street closed higher on Monday to start a... \n",
618
+ "1970-01-01 00:00:00.000000715 Tim Higgins, author of “Power Play: Tesla, Elo... \n",
619
+ "\n",
620
+ " keywords \\\n",
621
+ "1970-01-01 00:00:00.000000719 [investing] \n",
622
+ "1970-01-01 00:00:00.000000718 No keywords \n",
623
+ "1970-01-01 00:00:00.000000717 No keywords \n",
624
+ "1970-01-01 00:00:00.000000716 No keywords \n",
625
+ "1970-01-01 00:00:00.000000715 [News, Analyst Color, Tech, General] \n",
626
+ "\n",
627
+ " sentiment time exp_mean_7_days \n",
628
+ "1970-01-01 00:00:00.000000719 0.250000 21:26:09 0.250000 \n",
629
+ "1970-01-01 00:00:00.000000718 0.111905 22:10:00 0.215476 \n",
630
+ "1970-01-01 00:00:00.000000717 -0.032955 04:30:00 0.153369 \n",
631
+ "1970-01-01 00:00:00.000000716 0.050000 07:27:00 0.127526 \n",
632
+ "1970-01-01 00:00:00.000000715 0.008333 07:42:58 0.097728 "
633
+ ]
634
+ },
635
+ "execution_count": 24,
636
+ "metadata": {},
637
+ "output_type": "execute_result"
638
+ }
639
+ ],
640
+ "source": [
641
+ "df_processed.head()"
642
+ ]
643
+ },
644
+ {
645
+ "cell_type": "code",
646
+ "execution_count": 25,
647
+ "metadata": {},
648
+ "outputs": [
649
+ {
650
+ "data": {
651
+ "text/html": [
652
+ "<div>\n",
653
+ "<style scoped>\n",
654
+ " .dataframe tbody tr th:only-of-type {\n",
655
+ " vertical-align: middle;\n",
656
+ " }\n",
657
+ "\n",
658
+ " .dataframe tbody tr th {\n",
659
+ " vertical-align: top;\n",
660
+ " }\n",
661
+ "\n",
662
+ " .dataframe thead th {\n",
663
+ " text-align: right;\n",
664
+ " }\n",
665
+ "</style>\n",
666
+ "<table border=\"1\" class=\"dataframe\">\n",
667
+ " <thead>\n",
668
+ " <tr style=\"text-align: right;\">\n",
669
+ " <th></th>\n",
670
+ " <th>date</th>\n",
671
+ " <th>id</th>\n",
672
+ " <th>publisher</th>\n",
673
+ " <th>title</th>\n",
674
+ " <th>author</th>\n",
675
+ " <th>article_url</th>\n",
676
+ " <th>tickers</th>\n",
677
+ " <th>amp_url</th>\n",
678
+ " <th>image_url</th>\n",
679
+ " <th>description</th>\n",
680
+ " <th>keywords</th>\n",
681
+ " <th>sentiment</th>\n",
682
+ " <th>time</th>\n",
683
+ " <th>exp_mean_7_days</th>\n",
684
+ " </tr>\n",
685
+ " </thead>\n",
686
+ " <tbody>\n",
687
  " <tr>\n",
688
+ " <th>1970-01-01 00:00:00.000000004</th>\n",
689
+ " <td>2022-06-22</td>\n",
690
+ " <td>pksGB2t-5ukDnjkRw_VYr9sfcSOExNmQiHzUq9_rBa0</td>\n",
691
+ " <td>{'name': 'Zacks Investment Research', 'homepag...</td>\n",
692
+ " <td>Tesla (TSLA) to Cut 10% Salaried Jobs, Raise H...</td>\n",
693
+ " <td>Zacks Equity Research</td>\n",
694
+ " <td>https://www.zacks.com/stock/news/1942395/tesla...</td>\n",
695
+ " <td>[SMP, TSLA, WNC, FOXF]</td>\n",
696
+ " <td>https://www.zacks.com/amp/stock/news/1942395/t...</td>\n",
697
+ " <td>https://staticx-tuner.zacks.com/images/article...</td>\n",
698
+ " <td>Tesla (TSLA) to lay off 10% of its salaried wo...</td>\n",
699
+ " <td>No keywords</td>\n",
700
+ " <td>0.000</td>\n",
701
+ " <td>15:58:00</td>\n",
702
+ " <td>0.195010</td>\n",
703
  " </tr>\n",
704
  " <tr>\n",
705
+ " <th>1970-01-01 00:00:00.000000003</th>\n",
706
+ " <td>2022-06-22</td>\n",
707
+ " <td>xTSACaU2z-Mzqa23BiUeHTEUuY_jbVTWOYFkN3uszxg</td>\n",
708
+ " <td>{'name': 'MarketWatch', 'homepage_url': 'https...</td>\n",
709
+ " <td>These are the 10 used-car models whose prices ...</td>\n",
710
+ " <td>MarketWatch</td>\n",
711
+ " <td>https://www.marketwatch.com/story/these-are-th...</td>\n",
712
+ " <td>[LOTZ, TSLA]</td>\n",
713
+ " <td>https://www.marketwatch.com/amp/story/these-ar...</td>\n",
714
+ " <td>https://images.mktw.net/im-569120/social</td>\n",
715
+ " <td>Used-car prices rose on average 17% in May, wi...</td>\n",
716
+ " <td>No keywords</td>\n",
717
+ " <td>0.225</td>\n",
718
+ " <td>17:32:00</td>\n",
719
+ " <td>0.202508</td>\n",
720
  " </tr>\n",
721
  " <tr>\n",
722
+ " <th>1970-01-01 00:00:00.000000002</th>\n",
723
+ " <td>2022-06-22</td>\n",
724
+ " <td>SDepdS_qKyhE6vqbR6Fathnn81fYDapZCk3DKT1Xpv4</td>\n",
725
  " <td>{'name': 'The Motley Fool', 'homepage_url': 'h...</td>\n",
726
+ " <td>Why Tesla Shares Jumped Initially, Then Retrea...</td>\n",
727
+ " <td>newsfeedback@fool.com (Chris Neiger)</td>\n",
728
+ " <td>https://www.fool.com/investing/2022/06/22/why-...</td>\n",
729
+ " <td>[TSLA]</td>\n",
730
  " <td>No URL provided</td>\n",
731
+ " <td>https://g.foolcdn.com/editorial/images/686400/...</td>\n",
732
+ " <td>Tesla's Shanghai plant may temporarily suspend...</td>\n",
733
  " <td>[investing]</td>\n",
734
+ " <td>0.000</td>\n",
735
+ " <td>19:33:04</td>\n",
736
+ " <td>0.151881</td>\n",
737
  " </tr>\n",
738
  " <tr>\n",
739
+ " <th>1970-01-01 00:00:00.000000001</th>\n",
740
+ " <td>2022-06-22</td>\n",
741
+ " <td>x_9M1zYfZoHn_ptJM3USKxrodDtd4TcTsQSldXlViMw</td>\n",
742
+ " <td>{'name': 'MarketWatch', 'homepage_url': 'https...</td>\n",
743
+ " <td>Tesla's new factories are 'gigantic money furn...</td>\n",
744
+ " <td>MarketWatch</td>\n",
745
+ " <td>https://www.marketwatch.com/story/teslas-new-f...</td>\n",
746
+ " <td>[TSLA]</td>\n",
747
  " <td>No URL provided</td>\n",
748
+ " <td>https://images.mktw.net/im-569600/social</td>\n",
749
+ " <td>Tesla Inc.’s two newest car factories have bee...</td>\n",
750
+ " <td>No keywords</td>\n",
751
+ " <td>0.000</td>\n",
752
+ " <td>21:59:00</td>\n",
753
+ " <td>0.113910</td>\n",
754
+ " </tr>\n",
755
+ " <tr>\n",
756
+ " <th>1970-01-01 00:00:00.000000000</th>\n",
757
+ " <td>2022-06-22</td>\n",
758
+ " <td>nVZWipNVQFFoSi87tF0APXW_uRHl13wat_uR1c0iR6A</td>\n",
759
+ " <td>{'name': 'Benzinga', 'homepage_url': 'https://...</td>\n",
760
+ " <td>Elon Musk Gives New Update On Tesla Cybertruck...</td>\n",
761
+ " <td>Chris Katje</td>\n",
762
+ " <td>https://www.benzinga.com/news/22/06/27820587/e...</td>\n",
763
+ " <td>[F, TSLA, RIVN]</td>\n",
764
+ " <td>https://www.benzinga.com/amp/content/27820587</td>\n",
765
+ " <td>https://cdn.benzinga.com/files/images/story/20...</td>\n",
766
+ " <td>A recent interview between Tesla Inc (NASDAQ: ...</td>\n",
767
+ " <td>[News, Interview]</td>\n",
768
+ " <td>0.040</td>\n",
769
+ " <td>22:40:56</td>\n",
770
+ " <td>0.095433</td>\n",
771
  " </tr>\n",
772
  " </tbody>\n",
773
  "</table>\n",
 
775
  ],
776
  "text/plain": [
777
  " date \\\n",
778
+ "1970-01-01 00:00:00.000000004 2022-06-22 \n",
779
+ "1970-01-01 00:00:00.000000003 2022-06-22 \n",
780
+ "1970-01-01 00:00:00.000000002 2022-06-22 \n",
781
+ "1970-01-01 00:00:00.000000001 2022-06-22 \n",
782
+ "1970-01-01 00:00:00.000000000 2022-06-22 \n",
783
  "\n",
784
  " id \\\n",
785
+ "1970-01-01 00:00:00.000000004 pksGB2t-5ukDnjkRw_VYr9sfcSOExNmQiHzUq9_rBa0 \n",
786
+ "1970-01-01 00:00:00.000000003 xTSACaU2z-Mzqa23BiUeHTEUuY_jbVTWOYFkN3uszxg \n",
787
+ "1970-01-01 00:00:00.000000002 SDepdS_qKyhE6vqbR6Fathnn81fYDapZCk3DKT1Xpv4 \n",
788
+ "1970-01-01 00:00:00.000000001 x_9M1zYfZoHn_ptJM3USKxrodDtd4TcTsQSldXlViMw \n",
789
+ "1970-01-01 00:00:00.000000000 nVZWipNVQFFoSi87tF0APXW_uRHl13wat_uR1c0iR6A \n",
790
  "\n",
791
  " publisher \\\n",
792
+ "1970-01-01 00:00:00.000000004 {'name': 'Zacks Investment Research', 'homepag... \n",
793
+ "1970-01-01 00:00:00.000000003 {'name': 'MarketWatch', 'homepage_url': 'https... \n",
794
+ "1970-01-01 00:00:00.000000002 {'name': 'The Motley Fool', 'homepage_url': 'h... \n",
795
+ "1970-01-01 00:00:00.000000001 {'name': 'MarketWatch', 'homepage_url': 'https... \n",
796
+ "1970-01-01 00:00:00.000000000 {'name': 'Benzinga', 'homepage_url': 'https://... \n",
797
  "\n",
798
  " title \\\n",
799
+ "1970-01-01 00:00:00.000000004 Tesla (TSLA) to Cut 10% Salaried Jobs, Raise H... \n",
800
+ "1970-01-01 00:00:00.000000003 These are the 10 used-car models whose prices ... \n",
801
+ "1970-01-01 00:00:00.000000002 Why Tesla Shares Jumped Initially, Then Retrea... \n",
802
+ "1970-01-01 00:00:00.000000001 Tesla's new factories are 'gigantic money furn... \n",
803
+ "1970-01-01 00:00:00.000000000 Elon Musk Gives New Update On Tesla Cybertruck... \n",
804
+ "\n",
805
+ " author \\\n",
806
+ "1970-01-01 00:00:00.000000004 Zacks Equity Research \n",
807
+ "1970-01-01 00:00:00.000000003 MarketWatch \n",
808
+ "1970-01-01 00:00:00.000000002 newsfeedback@fool.com (Chris Neiger) \n",
809
+ "1970-01-01 00:00:00.000000001 MarketWatch \n",
810
+ "1970-01-01 00:00:00.000000000 Chris Katje \n",
811
  "\n",
812
  " article_url \\\n",
813
+ "1970-01-01 00:00:00.000000004 https://www.zacks.com/stock/news/1942395/tesla... \n",
814
+ "1970-01-01 00:00:00.000000003 https://www.marketwatch.com/story/these-are-th... \n",
815
+ "1970-01-01 00:00:00.000000002 https://www.fool.com/investing/2022/06/22/why-... \n",
816
+ "1970-01-01 00:00:00.000000001 https://www.marketwatch.com/story/teslas-new-f... \n",
817
+ "1970-01-01 00:00:00.000000000 https://www.benzinga.com/news/22/06/27820587/e... \n",
818
  "\n",
819
+ " tickers \\\n",
820
+ "1970-01-01 00:00:00.000000004 [SMP, TSLA, WNC, FOXF] \n",
821
+ "1970-01-01 00:00:00.000000003 [LOTZ, TSLA] \n",
822
+ "1970-01-01 00:00:00.000000002 [TSLA] \n",
823
+ "1970-01-01 00:00:00.000000001 [TSLA] \n",
824
+ "1970-01-01 00:00:00.000000000 [F, TSLA, RIVN] \n",
825
+ "\n",
826
+ " amp_url \\\n",
827
+ "1970-01-01 00:00:00.000000004 https://www.zacks.com/amp/stock/news/1942395/t... \n",
828
+ "1970-01-01 00:00:00.000000003 https://www.marketwatch.com/amp/story/these-ar... \n",
829
+ "1970-01-01 00:00:00.000000002 No URL provided \n",
830
+ "1970-01-01 00:00:00.000000001 No URL provided \n",
831
+ "1970-01-01 00:00:00.000000000 https://www.benzinga.com/amp/content/27820587 \n",
832
  "\n",
833
  " image_url \\\n",
834
+ "1970-01-01 00:00:00.000000004 https://staticx-tuner.zacks.com/images/article... \n",
835
+ "1970-01-01 00:00:00.000000003 https://images.mktw.net/im-569120/social \n",
836
+ "1970-01-01 00:00:00.000000002 https://g.foolcdn.com/editorial/images/686400/... \n",
837
+ "1970-01-01 00:00:00.000000001 https://images.mktw.net/im-569600/social \n",
838
+ "1970-01-01 00:00:00.000000000 https://cdn.benzinga.com/files/images/story/20... \n",
839
  "\n",
840
  " description \\\n",
841
+ "1970-01-01 00:00:00.000000004 Tesla (TSLA) to lay off 10% of its salaried wo... \n",
842
+ "1970-01-01 00:00:00.000000003 Used-car prices rose on average 17% in May, wi... \n",
843
+ "1970-01-01 00:00:00.000000002 Tesla's Shanghai plant may temporarily suspend... \n",
844
+ "1970-01-01 00:00:00.000000001 Tesla Inc.’s two newest car factories have bee... \n",
845
+ "1970-01-01 00:00:00.000000000 A recent interview between Tesla Inc (NASDAQ: ... \n",
 
 
 
 
 
 
 
846
  "\n",
847
+ " keywords sentiment time \\\n",
848
+ "1970-01-01 00:00:00.000000004 No keywords 0.000 15:58:00 \n",
849
+ "1970-01-01 00:00:00.000000003 No keywords 0.225 17:32:00 \n",
850
+ "1970-01-01 00:00:00.000000002 [investing] 0.000 19:33:04 \n",
851
+ "1970-01-01 00:00:00.000000001 No keywords 0.000 21:59:00 \n",
852
+ "1970-01-01 00:00:00.000000000 [News, Interview] 0.040 22:40:56 \n",
853
+ "\n",
854
+ " exp_mean_7_days \n",
855
+ "1970-01-01 00:00:00.000000004 0.195010 \n",
856
+ "1970-01-01 00:00:00.000000003 0.202508 \n",
857
+ "1970-01-01 00:00:00.000000002 0.151881 \n",
858
+ "1970-01-01 00:00:00.000000001 0.113910 \n",
859
+ "1970-01-01 00:00:00.000000000 0.095433 "
860
  ]
861
  },
862
+ "execution_count": 25,
863
  "metadata": {},
864
  "output_type": "execute_result"
865
  }
866
  ],
867
  "source": [
868
+ "df_processed.tail()"
869
+ ]
870
+ },
871
+ {
872
+ "cell_type": "code",
873
+ "execution_count": 27,
874
+ "metadata": {},
875
+ "outputs": [
876
+ {
877
+ "name": "stdout",
878
+ "output_type": "stream",
879
+ "text": [
880
+ "2022-06-20\n",
881
+ "2024-05-02\n"
882
+ ]
883
+ }
884
+ ],
885
+ "source": [
886
+ "print(df_processed['date'].min())\n",
887
+ "print(df_processed['date'].max())"
888
+ ]
889
+ },
890
+ {
891
+ "cell_type": "code",
892
+ "execution_count": 28,
893
+ "metadata": {},
894
+ "outputs": [
895
+ {
896
+ "name": "stdout",
897
+ "output_type": "stream",
898
+ "text": [
899
+ "682 days, 0:00:00\n"
900
+ ]
901
+ }
902
+ ],
903
+ "source": [
904
+ "print(df_processed['date'].max() - df_processed['date'].min()) "
905
+ ]
906
+ },
907
+ {
908
+ "cell_type": "code",
909
+ "execution_count": 29,
910
+ "metadata": {},
911
+ "outputs": [
912
+ {
913
+ "data": {
914
+ "text/plain": [
915
+ "(720, 14)"
916
+ ]
917
+ },
918
+ "execution_count": 29,
919
+ "metadata": {},
920
+ "output_type": "execute_result"
921
+ }
922
+ ],
923
+ "source": [
924
+ "df_processed.shape"
925
+ ]
926
+ },
927
+ {
928
+ "cell_type": "code",
929
+ "execution_count": 30,
930
+ "metadata": {},
931
+ "outputs": [],
932
+ "source": [
933
+ "duplicates = df_processed[df_processed.duplicated('date')]"
934
+ ]
935
+ },
936
+ {
937
+ "cell_type": "code",
938
+ "execution_count": 31,
939
+ "metadata": {},
940
+ "outputs": [
941
+ {
942
+ "data": {
943
+ "text/plain": [
944
+ "(657, 14)"
945
+ ]
946
+ },
947
+ "execution_count": 31,
948
+ "metadata": {},
949
+ "output_type": "execute_result"
950
+ }
951
+ ],
952
+ "source": [
953
+ "duplicates.shape"
954
  ]
955
  },
956
  {
historical_stock.ipynb CHANGED
@@ -192,7 +192,7 @@
192
  "outputs": [],
193
  "source": [
194
  "# Define your file path and name\n",
195
- "file_path = '/Users/manos/Documents/BDS/MLops_mod/TSLA_stock_price.csv' # Customize the path and filename\n",
196
  "\n",
197
  "# Save the DataFrame to CSV\n",
198
  "data.to_csv(file_path)\n",
 
192
  "outputs": [],
193
  "source": [
194
  "# Define your file path and name\n",
195
+ "file_path = 'TSLA_stock_price.csv' # Customize the path and filename\n",
196
  "\n",
197
  "# Save the DataFrame to CSV\n",
198
  "data.to_csv(file_path)\n",
stock_preprocessing.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
@@ -20,7 +20,7 @@
20
  "import pandas_market_calendars as mcal\n",
21
  "import datetime\n",
22
  "import numpy as np\n",
23
- "from datetime import timedelta "
24
  ]
25
  },
26
  {
@@ -34,11 +34,11 @@
34
  "text": [
35
  " 1. open 2. high 3. low 4. close 5. volume\n",
36
  "date \n",
 
37
  "2024-05-01 182.00 185.86 179.0100 179.99 92829719.0\n",
38
  "2024-04-30 186.98 190.95 182.8401 183.28 127031787.0\n",
39
  "2024-04-29 188.42 198.87 184.5400 194.05 243869678.0\n",
40
- "2024-04-26 168.85 172.12 166.3700 168.29 109815725.0\n",
41
- "2024-04-25 158.96 170.88 158.3600 170.18 126427521.0\n"
42
  ]
43
  }
44
  ],
@@ -56,7 +56,7 @@
56
  },
57
  {
58
  "cell_type": "code",
59
- "execution_count": 4,
60
  "metadata": {},
61
  "outputs": [
62
  {
@@ -97,6 +97,14 @@
97
  " </thead>\n",
98
  " <tbody>\n",
99
  " <tr>\n",
 
 
 
 
 
 
 
 
100
  " <th>2024-05-01</th>\n",
101
  " <td>182.00</td>\n",
102
  " <td>185.8600</td>\n",
@@ -129,14 +137,6 @@
129
  " <td>109815725.0</td>\n",
130
  " </tr>\n",
131
  " <tr>\n",
132
- " <th>2024-04-25</th>\n",
133
- " <td>158.96</td>\n",
134
- " <td>170.8800</td>\n",
135
- " <td>158.3600</td>\n",
136
- " <td>170.18</td>\n",
137
- " <td>126427521.0</td>\n",
138
- " </tr>\n",
139
- " <tr>\n",
140
  " <th>...</th>\n",
141
  " <td>...</td>\n",
142
  " <td>...</td>\n",
@@ -186,17 +186,17 @@
186
  " </tr>\n",
187
  " </tbody>\n",
188
  "</table>\n",
189
- "<p>3484 rows × 5 columns</p>\n",
190
  "</div>"
191
  ],
192
  "text/plain": [
193
  " 1. open 2. high 3. low 4. close 5. volume\n",
194
  "date \n",
 
195
  "2024-05-01 182.00 185.8600 179.0100 179.99 92829719.0\n",
196
  "2024-04-30 186.98 190.9500 182.8401 183.28 127031787.0\n",
197
  "2024-04-29 188.42 198.8700 184.5400 194.05 243869678.0\n",
198
  "2024-04-26 168.85 172.1200 166.3700 168.29 109815725.0\n",
199
- "2024-04-25 158.96 170.8800 158.3600 170.18 126427521.0\n",
200
  "... ... ... ... ... ...\n",
201
  "2010-07-06 20.00 20.0000 15.8300 16.11 6866900.0\n",
202
  "2010-07-02 23.00 23.1000 18.7100 19.20 5139800.0\n",
@@ -204,10 +204,10 @@
204
  "2010-06-30 25.79 30.4192 23.3000 23.83 17187100.0\n",
205
  "2010-06-29 19.00 25.0000 17.5400 23.89 18766300.0\n",
206
  "\n",
207
- "[3484 rows x 5 columns]"
208
  ]
209
  },
210
- "execution_count": 4,
211
  "metadata": {},
212
  "output_type": "execute_result"
213
  }
@@ -218,7 +218,7 @@
218
  },
219
  {
220
  "cell_type": "code",
221
- "execution_count": 5,
222
  "metadata": {},
223
  "outputs": [
224
  {
@@ -226,17 +226,17 @@
226
  "output_type": "stream",
227
  "text": [
228
  "<class 'pandas.core.frame.DataFrame'>\n",
229
- "DatetimeIndex: 3484 entries, 2024-05-01 to 2010-06-29\n",
230
  "Data columns (total 5 columns):\n",
231
  " # Column Non-Null Count Dtype \n",
232
  "--- ------ -------------- ----- \n",
233
- " 0 1. open 3484 non-null float64\n",
234
- " 1 2. high 3484 non-null float64\n",
235
- " 2 3. low 3484 non-null float64\n",
236
- " 3 4. close 3484 non-null float64\n",
237
- " 4 5. volume 3484 non-null float64\n",
238
  "dtypes: float64(5)\n",
239
- "memory usage: 163.3 KB\n"
240
  ]
241
  }
242
  ],
@@ -246,7 +246,7 @@
246
  },
247
  {
248
  "cell_type": "code",
249
- "execution_count": 6,
250
  "metadata": {},
251
  "outputs": [
252
  {
@@ -254,12 +254,12 @@
254
  "text/plain": [
255
  "{'1. Information': 'Daily Prices (open, high, low, close) and Volumes',\n",
256
  " '2. Symbol': 'TSLA',\n",
257
- " '3. Last Refreshed': '2024-05-01',\n",
258
  " '4. Output Size': 'Full size',\n",
259
  " '5. Time Zone': 'US/Eastern'}"
260
  ]
261
  },
262
- "execution_count": 6,
263
  "metadata": {},
264
  "output_type": "execute_result"
265
  }
@@ -270,7 +270,7 @@
270
  },
271
  {
272
  "cell_type": "code",
273
- "execution_count": 7,
274
  "metadata": {},
275
  "outputs": [],
276
  "source": [
@@ -289,7 +289,7 @@
289
  },
290
  {
291
  "cell_type": "code",
292
- "execution_count": 8,
293
  "metadata": {},
294
  "outputs": [],
295
  "source": [
@@ -316,7 +316,7 @@
316
  },
317
  {
318
  "cell_type": "code",
319
- "execution_count": 9,
320
  "metadata": {},
321
  "outputs": [],
322
  "source": [
@@ -382,7 +382,7 @@
382
  },
383
  {
384
  "cell_type": "code",
385
- "execution_count": 10,
386
  "metadata": {},
387
  "outputs": [],
388
  "source": [
@@ -394,7 +394,7 @@
394
  },
395
  {
396
  "cell_type": "code",
397
- "execution_count": 11,
398
  "metadata": {},
399
  "outputs": [],
400
  "source": [
@@ -403,13 +403,220 @@
403
  },
404
  {
405
  "cell_type": "code",
406
- "execution_count": 12,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
407
  "metadata": {},
408
  "outputs": [],
409
  "source": [
410
  "data.reset_index(inplace=True)\n"
411
  ]
412
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
413
  {
414
  "cell_type": "code",
415
  "execution_count": 13,
@@ -447,6 +654,15 @@
447
  " <tbody>\n",
448
  " <tr>\n",
449
  " <th>0</th>\n",
 
 
 
 
 
 
 
 
 
450
  " <td>2024-05-01</td>\n",
451
  " <td>182.00</td>\n",
452
  " <td>185.8600</td>\n",
@@ -455,7 +671,7 @@
455
  " <td>92829719.0</td>\n",
456
  " </tr>\n",
457
  " <tr>\n",
458
- " <th>1</th>\n",
459
  " <td>2024-04-30</td>\n",
460
  " <td>186.98</td>\n",
461
  " <td>190.9500</td>\n",
@@ -464,7 +680,7 @@
464
  " <td>127031787.0</td>\n",
465
  " </tr>\n",
466
  " <tr>\n",
467
- " <th>2</th>\n",
468
  " <td>2024-04-29</td>\n",
469
  " <td>188.42</td>\n",
470
  " <td>198.8700</td>\n",
@@ -473,7 +689,7 @@
473
  " <td>243869678.0</td>\n",
474
  " </tr>\n",
475
  " <tr>\n",
476
- " <th>3</th>\n",
477
  " <td>2024-04-26</td>\n",
478
  " <td>168.85</td>\n",
479
  " <td>172.1200</td>\n",
@@ -482,15 +698,6 @@
482
  " <td>109815725.0</td>\n",
483
  " </tr>\n",
484
  " <tr>\n",
485
- " <th>4</th>\n",
486
- " <td>2024-04-25</td>\n",
487
- " <td>158.96</td>\n",
488
- " <td>170.8800</td>\n",
489
- " <td>158.3600</td>\n",
490
- " <td>170.18</td>\n",
491
- " <td>126427521.0</td>\n",
492
- " </tr>\n",
493
- " <tr>\n",
494
  " <th>...</th>\n",
495
  " <td>...</td>\n",
496
  " <td>...</td>\n",
@@ -500,7 +707,7 @@
500
  " <td>...</td>\n",
501
  " </tr>\n",
502
  " <tr>\n",
503
- " <th>3479</th>\n",
504
  " <td>2010-07-06</td>\n",
505
  " <td>20.00</td>\n",
506
  " <td>20.0000</td>\n",
@@ -509,7 +716,7 @@
509
  " <td>6866900.0</td>\n",
510
  " </tr>\n",
511
  " <tr>\n",
512
- " <th>3480</th>\n",
513
  " <td>2010-07-02</td>\n",
514
  " <td>23.00</td>\n",
515
  " <td>23.1000</td>\n",
@@ -518,7 +725,7 @@
518
  " <td>5139800.0</td>\n",
519
  " </tr>\n",
520
  " <tr>\n",
521
- " <th>3481</th>\n",
522
  " <td>2010-07-01</td>\n",
523
  " <td>25.00</td>\n",
524
  " <td>25.9200</td>\n",
@@ -527,7 +734,7 @@
527
  " <td>8218800.0</td>\n",
528
  " </tr>\n",
529
  " <tr>\n",
530
- " <th>3482</th>\n",
531
  " <td>2010-06-30</td>\n",
532
  " <td>25.79</td>\n",
533
  " <td>30.4192</td>\n",
@@ -536,7 +743,7 @@
536
  " <td>17187100.0</td>\n",
537
  " </tr>\n",
538
  " <tr>\n",
539
- " <th>3483</th>\n",
540
  " <td>2010-06-29</td>\n",
541
  " <td>19.00</td>\n",
542
  " <td>25.0000</td>\n",
@@ -546,24 +753,24 @@
546
  " </tr>\n",
547
  " </tbody>\n",
548
  "</table>\n",
549
- "<p>3484 rows × 6 columns</p>\n",
550
  "</div>"
551
  ],
552
  "text/plain": [
553
  " date open high low close volume\n",
554
- "0 2024-05-01 182.00 185.8600 179.0100 179.99 92829719.0\n",
555
- "1 2024-04-30 186.98 190.9500 182.8401 183.28 127031787.0\n",
556
- "2 2024-04-29 188.42 198.8700 184.5400 194.05 243869678.0\n",
557
- "3 2024-04-26 168.85 172.1200 166.3700 168.29 109815725.0\n",
558
- "4 2024-04-25 158.96 170.8800 158.3600 170.18 126427521.0\n",
559
  "... ... ... ... ... ... ...\n",
560
- "3479 2010-07-06 20.00 20.0000 15.8300 16.11 6866900.0\n",
561
- "3480 2010-07-02 23.00 23.1000 18.7100 19.20 5139800.0\n",
562
- "3481 2010-07-01 25.00 25.9200 20.2700 21.96 8218800.0\n",
563
- "3482 2010-06-30 25.79 30.4192 23.3000 23.83 17187100.0\n",
564
- "3483 2010-06-29 19.00 25.0000 17.5400 23.89 18766300.0\n",
565
  "\n",
566
- "[3484 rows x 6 columns]"
567
  ]
568
  },
569
  "execution_count": 13,
@@ -575,6 +782,168 @@
575
  "data"
576
  ]
577
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
578
  {
579
  "cell_type": "code",
580
  "execution_count": null,
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 17,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
 
20
  "import pandas_market_calendars as mcal\n",
21
  "import datetime\n",
22
  "import numpy as np\n",
23
+ "from datetime import datetime, timedelta\n"
24
  ]
25
  },
26
  {
 
34
  "text": [
35
  " 1. open 2. high 3. low 4. close 5. volume\n",
36
  "date \n",
37
+ "2024-05-02 182.86 184.60 176.0200 180.01 89148041.0\n",
38
  "2024-05-01 182.00 185.86 179.0100 179.99 92829719.0\n",
39
  "2024-04-30 186.98 190.95 182.8401 183.28 127031787.0\n",
40
  "2024-04-29 188.42 198.87 184.5400 194.05 243869678.0\n",
41
+ "2024-04-26 168.85 172.12 166.3700 168.29 109815725.0\n"
 
42
  ]
43
  }
44
  ],
 
56
  },
57
  {
58
  "cell_type": "code",
59
+ "execution_count": 3,
60
  "metadata": {},
61
  "outputs": [
62
  {
 
97
  " </thead>\n",
98
  " <tbody>\n",
99
  " <tr>\n",
100
+ " <th>2024-05-02</th>\n",
101
+ " <td>182.86</td>\n",
102
+ " <td>184.6000</td>\n",
103
+ " <td>176.0200</td>\n",
104
+ " <td>180.01</td>\n",
105
+ " <td>89148041.0</td>\n",
106
+ " </tr>\n",
107
+ " <tr>\n",
108
  " <th>2024-05-01</th>\n",
109
  " <td>182.00</td>\n",
110
  " <td>185.8600</td>\n",
 
137
  " <td>109815725.0</td>\n",
138
  " </tr>\n",
139
  " <tr>\n",
 
 
 
 
 
 
 
 
140
  " <th>...</th>\n",
141
  " <td>...</td>\n",
142
  " <td>...</td>\n",
 
186
  " </tr>\n",
187
  " </tbody>\n",
188
  "</table>\n",
189
+ "<p>3485 rows × 5 columns</p>\n",
190
  "</div>"
191
  ],
192
  "text/plain": [
193
  " 1. open 2. high 3. low 4. close 5. volume\n",
194
  "date \n",
195
+ "2024-05-02 182.86 184.6000 176.0200 180.01 89148041.0\n",
196
  "2024-05-01 182.00 185.8600 179.0100 179.99 92829719.0\n",
197
  "2024-04-30 186.98 190.9500 182.8401 183.28 127031787.0\n",
198
  "2024-04-29 188.42 198.8700 184.5400 194.05 243869678.0\n",
199
  "2024-04-26 168.85 172.1200 166.3700 168.29 109815725.0\n",
 
200
  "... ... ... ... ... ...\n",
201
  "2010-07-06 20.00 20.0000 15.8300 16.11 6866900.0\n",
202
  "2010-07-02 23.00 23.1000 18.7100 19.20 5139800.0\n",
 
204
  "2010-06-30 25.79 30.4192 23.3000 23.83 17187100.0\n",
205
  "2010-06-29 19.00 25.0000 17.5400 23.89 18766300.0\n",
206
  "\n",
207
+ "[3485 rows x 5 columns]"
208
  ]
209
  },
210
+ "execution_count": 3,
211
  "metadata": {},
212
  "output_type": "execute_result"
213
  }
 
218
  },
219
  {
220
  "cell_type": "code",
221
+ "execution_count": 4,
222
  "metadata": {},
223
  "outputs": [
224
  {
 
226
  "output_type": "stream",
227
  "text": [
228
  "<class 'pandas.core.frame.DataFrame'>\n",
229
+ "DatetimeIndex: 3485 entries, 2024-05-02 to 2010-06-29\n",
230
  "Data columns (total 5 columns):\n",
231
  " # Column Non-Null Count Dtype \n",
232
  "--- ------ -------------- ----- \n",
233
+ " 0 1. open 3485 non-null float64\n",
234
+ " 1 2. high 3485 non-null float64\n",
235
+ " 2 3. low 3485 non-null float64\n",
236
+ " 3 4. close 3485 non-null float64\n",
237
+ " 4 5. volume 3485 non-null float64\n",
238
  "dtypes: float64(5)\n",
239
+ "memory usage: 163.4 KB\n"
240
  ]
241
  }
242
  ],
 
246
  },
247
  {
248
  "cell_type": "code",
249
+ "execution_count": 5,
250
  "metadata": {},
251
  "outputs": [
252
  {
 
254
  "text/plain": [
255
  "{'1. Information': 'Daily Prices (open, high, low, close) and Volumes',\n",
256
  " '2. Symbol': 'TSLA',\n",
257
+ " '3. Last Refreshed': '2024-05-02',\n",
258
  " '4. Output Size': 'Full size',\n",
259
  " '5. Time Zone': 'US/Eastern'}"
260
  ]
261
  },
262
+ "execution_count": 5,
263
  "metadata": {},
264
  "output_type": "execute_result"
265
  }
 
270
  },
271
  {
272
  "cell_type": "code",
273
+ "execution_count": 6,
274
  "metadata": {},
275
  "outputs": [],
276
  "source": [
 
289
  },
290
  {
291
  "cell_type": "code",
292
+ "execution_count": 7,
293
  "metadata": {},
294
  "outputs": [],
295
  "source": [
 
316
  },
317
  {
318
  "cell_type": "code",
319
+ "execution_count": 8,
320
  "metadata": {},
321
  "outputs": [],
322
  "source": [
 
382
  },
383
  {
384
  "cell_type": "code",
385
+ "execution_count": 9,
386
  "metadata": {},
387
  "outputs": [],
388
  "source": [
 
394
  },
395
  {
396
  "cell_type": "code",
397
+ "execution_count": 10,
398
  "metadata": {},
399
  "outputs": [],
400
  "source": [
 
403
  },
404
  {
405
  "cell_type": "code",
406
+ "execution_count": 13,
407
+ "metadata": {},
408
+ "outputs": [
409
+ {
410
+ "data": {
411
+ "text/html": [
412
+ "<div>\n",
413
+ "<style scoped>\n",
414
+ " .dataframe tbody tr th:only-of-type {\n",
415
+ " vertical-align: middle;\n",
416
+ " }\n",
417
+ "\n",
418
+ " .dataframe tbody tr th {\n",
419
+ " vertical-align: top;\n",
420
+ " }\n",
421
+ "\n",
422
+ " .dataframe thead th {\n",
423
+ " text-align: right;\n",
424
+ " }\n",
425
+ "</style>\n",
426
+ "<table border=\"1\" class=\"dataframe\">\n",
427
+ " <thead>\n",
428
+ " <tr style=\"text-align: right;\">\n",
429
+ " <th></th>\n",
430
+ " <th>open</th>\n",
431
+ " <th>high</th>\n",
432
+ " <th>low</th>\n",
433
+ " <th>close</th>\n",
434
+ " <th>volume</th>\n",
435
+ " </tr>\n",
436
+ " <tr>\n",
437
+ " <th>date</th>\n",
438
+ " <th></th>\n",
439
+ " <th></th>\n",
440
+ " <th></th>\n",
441
+ " <th></th>\n",
442
+ " <th></th>\n",
443
+ " </tr>\n",
444
+ " </thead>\n",
445
+ " <tbody>\n",
446
+ " <tr>\n",
447
+ " <th>2024-05-02</th>\n",
448
+ " <td>182.86</td>\n",
449
+ " <td>184.60</td>\n",
450
+ " <td>176.0200</td>\n",
451
+ " <td>180.01</td>\n",
452
+ " <td>89148041.0</td>\n",
453
+ " </tr>\n",
454
+ " <tr>\n",
455
+ " <th>2024-05-01</th>\n",
456
+ " <td>182.00</td>\n",
457
+ " <td>185.86</td>\n",
458
+ " <td>179.0100</td>\n",
459
+ " <td>179.99</td>\n",
460
+ " <td>92829719.0</td>\n",
461
+ " </tr>\n",
462
+ " <tr>\n",
463
+ " <th>2024-04-30</th>\n",
464
+ " <td>186.98</td>\n",
465
+ " <td>190.95</td>\n",
466
+ " <td>182.8401</td>\n",
467
+ " <td>183.28</td>\n",
468
+ " <td>127031787.0</td>\n",
469
+ " </tr>\n",
470
+ " <tr>\n",
471
+ " <th>2024-04-29</th>\n",
472
+ " <td>188.42</td>\n",
473
+ " <td>198.87</td>\n",
474
+ " <td>184.5400</td>\n",
475
+ " <td>194.05</td>\n",
476
+ " <td>243869678.0</td>\n",
477
+ " </tr>\n",
478
+ " <tr>\n",
479
+ " <th>2024-04-26</th>\n",
480
+ " <td>168.85</td>\n",
481
+ " <td>172.12</td>\n",
482
+ " <td>166.3700</td>\n",
483
+ " <td>168.29</td>\n",
484
+ " <td>109815725.0</td>\n",
485
+ " </tr>\n",
486
+ " </tbody>\n",
487
+ "</table>\n",
488
+ "</div>"
489
+ ],
490
+ "text/plain": [
491
+ " open high low close volume\n",
492
+ "date \n",
493
+ "2024-05-02 182.86 184.60 176.0200 180.01 89148041.0\n",
494
+ "2024-05-01 182.00 185.86 179.0100 179.99 92829719.0\n",
495
+ "2024-04-30 186.98 190.95 182.8401 183.28 127031787.0\n",
496
+ "2024-04-29 188.42 198.87 184.5400 194.05 243869678.0\n",
497
+ "2024-04-26 168.85 172.12 166.3700 168.29 109815725.0"
498
+ ]
499
+ },
500
+ "execution_count": 13,
501
+ "metadata": {},
502
+ "output_type": "execute_result"
503
+ }
504
+ ],
505
+ "source": [
506
+ "data.head()"
507
+ ]
508
+ },
509
+ {
510
+ "cell_type": "code",
511
+ "execution_count": 14,
512
  "metadata": {},
513
  "outputs": [],
514
  "source": [
515
  "data.reset_index(inplace=True)\n"
516
  ]
517
  },
518
+ {
519
+ "cell_type": "code",
520
+ "execution_count": 15,
521
+ "metadata": {},
522
+ "outputs": [
523
+ {
524
+ "data": {
525
+ "text/html": [
526
+ "<div>\n",
527
+ "<style scoped>\n",
528
+ " .dataframe tbody tr th:only-of-type {\n",
529
+ " vertical-align: middle;\n",
530
+ " }\n",
531
+ "\n",
532
+ " .dataframe tbody tr th {\n",
533
+ " vertical-align: top;\n",
534
+ " }\n",
535
+ "\n",
536
+ " .dataframe thead th {\n",
537
+ " text-align: right;\n",
538
+ " }\n",
539
+ "</style>\n",
540
+ "<table border=\"1\" class=\"dataframe\">\n",
541
+ " <thead>\n",
542
+ " <tr style=\"text-align: right;\">\n",
543
+ " <th></th>\n",
544
+ " <th>date</th>\n",
545
+ " <th>open</th>\n",
546
+ " <th>high</th>\n",
547
+ " <th>low</th>\n",
548
+ " <th>close</th>\n",
549
+ " <th>volume</th>\n",
550
+ " </tr>\n",
551
+ " </thead>\n",
552
+ " <tbody>\n",
553
+ " <tr>\n",
554
+ " <th>0</th>\n",
555
+ " <td>2024-05-02</td>\n",
556
+ " <td>182.86</td>\n",
557
+ " <td>184.60</td>\n",
558
+ " <td>176.0200</td>\n",
559
+ " <td>180.01</td>\n",
560
+ " <td>89148041.0</td>\n",
561
+ " </tr>\n",
562
+ " <tr>\n",
563
+ " <th>1</th>\n",
564
+ " <td>2024-05-01</td>\n",
565
+ " <td>182.00</td>\n",
566
+ " <td>185.86</td>\n",
567
+ " <td>179.0100</td>\n",
568
+ " <td>179.99</td>\n",
569
+ " <td>92829719.0</td>\n",
570
+ " </tr>\n",
571
+ " <tr>\n",
572
+ " <th>2</th>\n",
573
+ " <td>2024-04-30</td>\n",
574
+ " <td>186.98</td>\n",
575
+ " <td>190.95</td>\n",
576
+ " <td>182.8401</td>\n",
577
+ " <td>183.28</td>\n",
578
+ " <td>127031787.0</td>\n",
579
+ " </tr>\n",
580
+ " <tr>\n",
581
+ " <th>3</th>\n",
582
+ " <td>2024-04-29</td>\n",
583
+ " <td>188.42</td>\n",
584
+ " <td>198.87</td>\n",
585
+ " <td>184.5400</td>\n",
586
+ " <td>194.05</td>\n",
587
+ " <td>243869678.0</td>\n",
588
+ " </tr>\n",
589
+ " <tr>\n",
590
+ " <th>4</th>\n",
591
+ " <td>2024-04-26</td>\n",
592
+ " <td>168.85</td>\n",
593
+ " <td>172.12</td>\n",
594
+ " <td>166.3700</td>\n",
595
+ " <td>168.29</td>\n",
596
+ " <td>109815725.0</td>\n",
597
+ " </tr>\n",
598
+ " </tbody>\n",
599
+ "</table>\n",
600
+ "</div>"
601
+ ],
602
+ "text/plain": [
603
+ " date open high low close volume\n",
604
+ "0 2024-05-02 182.86 184.60 176.0200 180.01 89148041.0\n",
605
+ "1 2024-05-01 182.00 185.86 179.0100 179.99 92829719.0\n",
606
+ "2 2024-04-30 186.98 190.95 182.8401 183.28 127031787.0\n",
607
+ "3 2024-04-29 188.42 198.87 184.5400 194.05 243869678.0\n",
608
+ "4 2024-04-26 168.85 172.12 166.3700 168.29 109815725.0"
609
+ ]
610
+ },
611
+ "execution_count": 15,
612
+ "metadata": {},
613
+ "output_type": "execute_result"
614
+ }
615
+ ],
616
+ "source": [
617
+ "data.head()"
618
+ ]
619
+ },
620
  {
621
  "cell_type": "code",
622
  "execution_count": 13,
 
654
  " <tbody>\n",
655
  " <tr>\n",
656
  " <th>0</th>\n",
657
+ " <td>2024-05-02</td>\n",
658
+ " <td>182.86</td>\n",
659
+ " <td>184.6000</td>\n",
660
+ " <td>176.0200</td>\n",
661
+ " <td>180.01</td>\n",
662
+ " <td>89148041.0</td>\n",
663
+ " </tr>\n",
664
+ " <tr>\n",
665
+ " <th>1</th>\n",
666
  " <td>2024-05-01</td>\n",
667
  " <td>182.00</td>\n",
668
  " <td>185.8600</td>\n",
 
671
  " <td>92829719.0</td>\n",
672
  " </tr>\n",
673
  " <tr>\n",
674
+ " <th>2</th>\n",
675
  " <td>2024-04-30</td>\n",
676
  " <td>186.98</td>\n",
677
  " <td>190.9500</td>\n",
 
680
  " <td>127031787.0</td>\n",
681
  " </tr>\n",
682
  " <tr>\n",
683
+ " <th>3</th>\n",
684
  " <td>2024-04-29</td>\n",
685
  " <td>188.42</td>\n",
686
  " <td>198.8700</td>\n",
 
689
  " <td>243869678.0</td>\n",
690
  " </tr>\n",
691
  " <tr>\n",
692
+ " <th>4</th>\n",
693
  " <td>2024-04-26</td>\n",
694
  " <td>168.85</td>\n",
695
  " <td>172.1200</td>\n",
 
698
  " <td>109815725.0</td>\n",
699
  " </tr>\n",
700
  " <tr>\n",
 
 
 
 
 
 
 
 
 
701
  " <th>...</th>\n",
702
  " <td>...</td>\n",
703
  " <td>...</td>\n",
 
707
  " <td>...</td>\n",
708
  " </tr>\n",
709
  " <tr>\n",
710
+ " <th>3480</th>\n",
711
  " <td>2010-07-06</td>\n",
712
  " <td>20.00</td>\n",
713
  " <td>20.0000</td>\n",
 
716
  " <td>6866900.0</td>\n",
717
  " </tr>\n",
718
  " <tr>\n",
719
+ " <th>3481</th>\n",
720
  " <td>2010-07-02</td>\n",
721
  " <td>23.00</td>\n",
722
  " <td>23.1000</td>\n",
 
725
  " <td>5139800.0</td>\n",
726
  " </tr>\n",
727
  " <tr>\n",
728
+ " <th>3482</th>\n",
729
  " <td>2010-07-01</td>\n",
730
  " <td>25.00</td>\n",
731
  " <td>25.9200</td>\n",
 
734
  " <td>8218800.0</td>\n",
735
  " </tr>\n",
736
  " <tr>\n",
737
+ " <th>3483</th>\n",
738
  " <td>2010-06-30</td>\n",
739
  " <td>25.79</td>\n",
740
  " <td>30.4192</td>\n",
 
743
  " <td>17187100.0</td>\n",
744
  " </tr>\n",
745
  " <tr>\n",
746
+ " <th>3484</th>\n",
747
  " <td>2010-06-29</td>\n",
748
  " <td>19.00</td>\n",
749
  " <td>25.0000</td>\n",
 
753
  " </tr>\n",
754
  " </tbody>\n",
755
  "</table>\n",
756
+ "<p>3485 rows × 6 columns</p>\n",
757
  "</div>"
758
  ],
759
  "text/plain": [
760
  " date open high low close volume\n",
761
+ "0 2024-05-02 182.86 184.6000 176.0200 180.01 89148041.0\n",
762
+ "1 2024-05-01 182.00 185.8600 179.0100 179.99 92829719.0\n",
763
+ "2 2024-04-30 186.98 190.9500 182.8401 183.28 127031787.0\n",
764
+ "3 2024-04-29 188.42 198.8700 184.5400 194.05 243869678.0\n",
765
+ "4 2024-04-26 168.85 172.1200 166.3700 168.29 109815725.0\n",
766
  "... ... ... ... ... ... ...\n",
767
+ "3480 2010-07-06 20.00 20.0000 15.8300 16.11 6866900.0\n",
768
+ "3481 2010-07-02 23.00 23.1000 18.7100 19.20 5139800.0\n",
769
+ "3482 2010-07-01 25.00 25.9200 20.2700 21.96 8218800.0\n",
770
+ "3483 2010-06-30 25.79 30.4192 23.3000 23.83 17187100.0\n",
771
+ "3484 2010-06-29 19.00 25.0000 17.5400 23.89 18766300.0\n",
772
  "\n",
773
+ "[3485 rows x 6 columns]"
774
  ]
775
  },
776
  "execution_count": 13,
 
782
  "data"
783
  ]
784
  },
785
+ {
786
+ "cell_type": "code",
787
+ "execution_count": 42,
788
+ "metadata": {},
789
+ "outputs": [],
790
+ "source": [
791
+ "# Define the date range you're interested in\n",
792
+ "yesterday =datetime.now()-timedelta(days=1)\n",
793
+ "two_years_back = yesterday - timedelta(days=684)"
794
+ ]
795
+ },
796
+ {
797
+ "cell_type": "code",
798
+ "execution_count": 43,
799
+ "metadata": {},
800
+ "outputs": [],
801
+ "source": [
802
+ "# Filter the DataFrame to this range\n",
803
+ "filtered_df = data[(data['date'] >= two_years_back) & (data['date'] <= yesterday)]"
804
+ ]
805
+ },
806
+ {
807
+ "cell_type": "code",
808
+ "execution_count": 44,
809
+ "metadata": {},
810
+ "outputs": [
811
+ {
812
+ "data": {
813
+ "text/html": [
814
+ "<div>\n",
815
+ "<style scoped>\n",
816
+ " .dataframe tbody tr th:only-of-type {\n",
817
+ " vertical-align: middle;\n",
818
+ " }\n",
819
+ "\n",
820
+ " .dataframe tbody tr th {\n",
821
+ " vertical-align: top;\n",
822
+ " }\n",
823
+ "\n",
824
+ " .dataframe thead th {\n",
825
+ " text-align: right;\n",
826
+ " }\n",
827
+ "</style>\n",
828
+ "<table border=\"1\" class=\"dataframe\">\n",
829
+ " <thead>\n",
830
+ " <tr style=\"text-align: right;\">\n",
831
+ " <th></th>\n",
832
+ " <th>date</th>\n",
833
+ " <th>open</th>\n",
834
+ " <th>high</th>\n",
835
+ " <th>low</th>\n",
836
+ " <th>close</th>\n",
837
+ " <th>volume</th>\n",
838
+ " </tr>\n",
839
+ " </thead>\n",
840
+ " <tbody>\n",
841
+ " <tr>\n",
842
+ " <th>0</th>\n",
843
+ " <td>2024-05-02</td>\n",
844
+ " <td>182.86</td>\n",
845
+ " <td>184.60</td>\n",
846
+ " <td>176.0200</td>\n",
847
+ " <td>180.01</td>\n",
848
+ " <td>89148041.0</td>\n",
849
+ " </tr>\n",
850
+ " <tr>\n",
851
+ " <th>1</th>\n",
852
+ " <td>2024-05-01</td>\n",
853
+ " <td>182.00</td>\n",
854
+ " <td>185.86</td>\n",
855
+ " <td>179.0100</td>\n",
856
+ " <td>179.99</td>\n",
857
+ " <td>92829719.0</td>\n",
858
+ " </tr>\n",
859
+ " <tr>\n",
860
+ " <th>2</th>\n",
861
+ " <td>2024-04-30</td>\n",
862
+ " <td>186.98</td>\n",
863
+ " <td>190.95</td>\n",
864
+ " <td>182.8401</td>\n",
865
+ " <td>183.28</td>\n",
866
+ " <td>127031787.0</td>\n",
867
+ " </tr>\n",
868
+ " <tr>\n",
869
+ " <th>3</th>\n",
870
+ " <td>2024-04-29</td>\n",
871
+ " <td>188.42</td>\n",
872
+ " <td>198.87</td>\n",
873
+ " <td>184.5400</td>\n",
874
+ " <td>194.05</td>\n",
875
+ " <td>243869678.0</td>\n",
876
+ " </tr>\n",
877
+ " <tr>\n",
878
+ " <th>4</th>\n",
879
+ " <td>2024-04-26</td>\n",
880
+ " <td>168.85</td>\n",
881
+ " <td>172.12</td>\n",
882
+ " <td>166.3700</td>\n",
883
+ " <td>168.29</td>\n",
884
+ " <td>109815725.0</td>\n",
885
+ " </tr>\n",
886
+ " </tbody>\n",
887
+ "</table>\n",
888
+ "</div>"
889
+ ],
890
+ "text/plain": [
891
+ " date open high low close volume\n",
892
+ "0 2024-05-02 182.86 184.60 176.0200 180.01 89148041.0\n",
893
+ "1 2024-05-01 182.00 185.86 179.0100 179.99 92829719.0\n",
894
+ "2 2024-04-30 186.98 190.95 182.8401 183.28 127031787.0\n",
895
+ "3 2024-04-29 188.42 198.87 184.5400 194.05 243869678.0\n",
896
+ "4 2024-04-26 168.85 172.12 166.3700 168.29 109815725.0"
897
+ ]
898
+ },
899
+ "execution_count": 44,
900
+ "metadata": {},
901
+ "output_type": "execute_result"
902
+ }
903
+ ],
904
+ "source": [
905
+ "filtered_df.head()"
906
+ ]
907
+ },
908
+ {
909
+ "cell_type": "code",
910
+ "execution_count": 45,
911
+ "metadata": {},
912
+ "outputs": [
913
+ {
914
+ "name": "stdout",
915
+ "output_type": "stream",
916
+ "text": [
917
+ "2022-06-21 00:00:00\n",
918
+ "2024-05-02 00:00:00\n"
919
+ ]
920
+ }
921
+ ],
922
+ "source": [
923
+ "print(filtered_df['date'].min())\n",
924
+ "print(filtered_df['date'].max())"
925
+ ]
926
+ },
927
+ {
928
+ "cell_type": "code",
929
+ "execution_count": 46,
930
+ "metadata": {},
931
+ "outputs": [
932
+ {
933
+ "data": {
934
+ "text/plain": [
935
+ "(470, 6)"
936
+ ]
937
+ },
938
+ "execution_count": 46,
939
+ "metadata": {},
940
+ "output_type": "execute_result"
941
+ }
942
+ ],
943
+ "source": [
944
+ "filtered_df.shape"
945
+ ]
946
+ },
947
  {
948
  "cell_type": "code",
949
  "execution_count": null,