fklitt commited on
Commit
aa37877
·
1 Parent(s): a808231

Updated_13

Browse files
Stocks news prediction/Notebooks/1_historical_news.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 11,
6
  "metadata": {},
7
  "outputs": [
8
  {
@@ -11,7 +11,7 @@
11
  "True"
12
  ]
13
  },
14
- "execution_count": 11,
15
  "metadata": {},
16
  "output_type": "execute_result"
17
  }
@@ -38,23 +38,23 @@
38
  "name": "stdout",
39
  "output_type": "stream",
40
  "text": [
41
- "Fetched 50 articles from 2022-05-09 to 2022-06-28\n",
42
- "Fetched 50 articles from 2022-06-29 to 2022-08-18\n",
43
- "Fetched 50 articles from 2022-08-19 to 2022-10-08\n",
44
- "Fetched 50 articles from 2022-10-09 to 2022-11-28\n",
45
- "Fetched 50 articles from 2022-11-29 to 2023-01-18\n",
46
  "Rate limit reached. Waiting to retry...\n",
47
- "Fetched 50 articles from 2023-01-19 to 2023-03-10\n",
48
- "Fetched 50 articles from 2023-03-11 to 2023-04-30\n",
49
- "Fetched 50 articles from 2023-05-01 to 2023-06-20\n",
50
- "Fetched 50 articles from 2023-06-21 to 2023-08-10\n",
51
- "Fetched 50 articles from 2023-08-11 to 2023-09-30\n",
52
  "Rate limit reached. Waiting to retry...\n",
53
- "Fetched 50 articles from 2023-10-01 to 2023-11-20\n",
54
- "Fetched 50 articles from 2023-11-21 to 2024-01-10\n",
55
- "Fetched 50 articles from 2024-01-11 to 2024-03-01\n",
56
- "Fetched 50 articles from 2024-03-02 to 2024-04-21\n",
57
- "Fetched 50 articles from 2024-04-22 to 2024-05-08\n",
58
  "Total articles fetched: 750\n"
59
  ]
60
  }
@@ -121,7 +121,7 @@
121
  },
122
  {
123
  "cell_type": "code",
124
- "execution_count": 6,
125
  "metadata": {},
126
  "outputs": [],
127
  "source": [
@@ -131,7 +131,7 @@
131
  },
132
  {
133
  "cell_type": "code",
134
- "execution_count": 7,
135
  "metadata": {},
136
  "outputs": [
137
  {
@@ -139,13 +139,13 @@
139
  "output_type": "stream",
140
  "text": [
141
  "<class 'pandas.core.frame.DataFrame'>\n",
142
- "RangeIndex: 75 entries, 0 to 74\n",
143
  "Data columns (total 3 columns):\n",
144
  " # Column Non-Null Count Dtype \n",
145
  "--- ------ -------------- ----- \n",
146
- " 0 date 75 non-null object \n",
147
- " 1 ticker 75 non-null object \n",
148
- " 2 sentiment 75 non-null float64\n",
149
  "dtypes: float64(1), object(2)\n",
150
  "memory usage: 1.9+ KB\n"
151
  ]
@@ -157,7 +157,7 @@
157
  },
158
  {
159
  "cell_type": "code",
160
- "execution_count": 8,
161
  "metadata": {},
162
  "outputs": [
163
  {
@@ -189,33 +189,33 @@
189
  " <tbody>\n",
190
  " <tr>\n",
191
  " <th>0</th>\n",
192
- " <td>2022-06-24</td>\n",
193
  " <td>TSLA</td>\n",
194
- " <td>-0.084224</td>\n",
195
  " </tr>\n",
196
  " <tr>\n",
197
  " <th>1</th>\n",
198
- " <td>2022-06-25</td>\n",
199
  " <td>TSLA</td>\n",
200
- " <td>0.250000</td>\n",
201
  " </tr>\n",
202
  " <tr>\n",
203
  " <th>2</th>\n",
204
- " <td>2022-06-26</td>\n",
205
  " <td>TSLA</td>\n",
206
- " <td>0.000556</td>\n",
207
  " </tr>\n",
208
  " <tr>\n",
209
  " <th>3</th>\n",
210
- " <td>2022-06-27</td>\n",
211
  " <td>TSLA</td>\n",
212
- " <td>0.150126</td>\n",
213
  " </tr>\n",
214
  " <tr>\n",
215
  " <th>4</th>\n",
216
- " <td>2022-06-28</td>\n",
217
  " <td>TSLA</td>\n",
218
- " <td>0.126298</td>\n",
219
  " </tr>\n",
220
  " </tbody>\n",
221
  "</table>\n",
@@ -223,14 +223,14 @@
223
  ],
224
  "text/plain": [
225
  " date ticker sentiment\n",
226
- "0 2022-06-24 TSLA -0.084224\n",
227
- "1 2022-06-25 TSLA 0.250000\n",
228
- "2 2022-06-26 TSLA 0.000556\n",
229
- "3 2022-06-27 TSLA 0.150126\n",
230
- "4 2022-06-28 TSLA 0.126298"
231
  ]
232
  },
233
- "execution_count": 8,
234
  "metadata": {},
235
  "output_type": "execute_result"
236
  }
@@ -241,7 +241,7 @@
241
  },
242
  {
243
  "cell_type": "code",
244
- "execution_count": 9,
245
  "metadata": {},
246
  "outputs": [],
247
  "source": [
@@ -250,7 +250,7 @@
250
  },
251
  {
252
  "cell_type": "code",
253
- "execution_count": 10,
254
  "metadata": {},
255
  "outputs": [],
256
  "source": [
@@ -260,7 +260,7 @@
260
  },
261
  {
262
  "cell_type": "code",
263
- "execution_count": 12,
264
  "metadata": {},
265
  "outputs": [],
266
  "source": [
@@ -269,7 +269,7 @@
269
  },
270
  {
271
  "cell_type": "code",
272
- "execution_count": 13,
273
  "metadata": {},
274
  "outputs": [],
275
  "source": [
@@ -278,7 +278,7 @@
278
  },
279
  {
280
  "cell_type": "code",
281
- "execution_count": 14,
282
  "metadata": {},
283
  "outputs": [
284
  {
@@ -310,39 +310,39 @@
310
  " </thead>\n",
311
  " <tbody>\n",
312
  " <tr>\n",
313
- " <th>74</th>\n",
314
- " <td>2024-05-08</td>\n",
315
- " <td>TSLA</td>\n",
316
- " <td>0.010694</td>\n",
317
- " <td>0.010694</td>\n",
318
- " </tr>\n",
319
- " <tr>\n",
320
  " <th>73</th>\n",
321
- " <td>2024-05-07</td>\n",
322
  " <td>TSLA</td>\n",
323
- " <td>0.032778</td>\n",
324
- " <td>0.016215</td>\n",
325
  " </tr>\n",
326
  " <tr>\n",
327
  " <th>72</th>\n",
328
- " <td>2024-05-06</td>\n",
329
  " <td>TSLA</td>\n",
330
- " <td>0.152492</td>\n",
331
- " <td>0.050285</td>\n",
332
  " </tr>\n",
333
  " <tr>\n",
334
  " <th>71</th>\n",
335
- " <td>2024-05-05</td>\n",
336
  " <td>TSLA</td>\n",
337
- " <td>0.036190</td>\n",
338
- " <td>0.046761</td>\n",
339
  " </tr>\n",
340
  " <tr>\n",
341
  " <th>70</th>\n",
342
- " <td>2024-05-04</td>\n",
 
 
 
 
 
 
 
343
  " <td>TSLA</td>\n",
344
- " <td>0.062665</td>\n",
345
- " <td>0.050737</td>\n",
346
  " </tr>\n",
347
  " </tbody>\n",
348
  "</table>\n",
@@ -350,14 +350,14 @@
350
  ],
351
  "text/plain": [
352
  " date ticker sentiment exp_mean_7_days\n",
353
- "74 2024-05-08 TSLA 0.010694 0.010694\n",
354
- "73 2024-05-07 TSLA 0.032778 0.016215\n",
355
- "72 2024-05-06 TSLA 0.152492 0.050285\n",
356
- "71 2024-05-05 TSLA 0.036190 0.046761\n",
357
- "70 2024-05-04 TSLA 0.062665 0.050737"
358
  ]
359
  },
360
- "execution_count": 14,
361
  "metadata": {},
362
  "output_type": "execute_result"
363
  }
@@ -368,7 +368,7 @@
368
  },
369
  {
370
  "cell_type": "code",
371
- "execution_count": 15,
372
  "metadata": {},
373
  "outputs": [
374
  {
@@ -401,38 +401,38 @@
401
  " <tbody>\n",
402
  " <tr>\n",
403
  " <th>4</th>\n",
404
- " <td>2022-06-28</td>\n",
405
  " <td>TSLA</td>\n",
406
- " <td>0.126298</td>\n",
407
- " <td>0.079978</td>\n",
408
  " </tr>\n",
409
  " <tr>\n",
410
  " <th>3</th>\n",
411
- " <td>2022-06-27</td>\n",
412
  " <td>TSLA</td>\n",
413
- " <td>0.150126</td>\n",
414
- " <td>0.097515</td>\n",
415
  " </tr>\n",
416
  " <tr>\n",
417
  " <th>2</th>\n",
418
- " <td>2022-06-26</td>\n",
419
  " <td>TSLA</td>\n",
420
- " <td>0.000556</td>\n",
421
- " <td>0.073275</td>\n",
422
  " </tr>\n",
423
  " <tr>\n",
424
  " <th>1</th>\n",
425
- " <td>2022-06-25</td>\n",
426
  " <td>TSLA</td>\n",
427
- " <td>0.250000</td>\n",
428
- " <td>0.117456</td>\n",
429
  " </tr>\n",
430
  " <tr>\n",
431
  " <th>0</th>\n",
432
- " <td>2022-06-24</td>\n",
433
  " <td>TSLA</td>\n",
434
- " <td>-0.084224</td>\n",
435
- " <td>0.067036</td>\n",
436
  " </tr>\n",
437
  " </tbody>\n",
438
  "</table>\n",
@@ -440,14 +440,14 @@
440
  ],
441
  "text/plain": [
442
  " date ticker sentiment exp_mean_7_days\n",
443
- "4 2022-06-28 TSLA 0.126298 0.079978\n",
444
- "3 2022-06-27 TSLA 0.150126 0.097515\n",
445
- "2 2022-06-26 TSLA 0.000556 0.073275\n",
446
- "1 2022-06-25 TSLA 0.250000 0.117456\n",
447
- "0 2022-06-24 TSLA -0.084224 0.067036"
448
  ]
449
  },
450
- "execution_count": 15,
451
  "metadata": {},
452
  "output_type": "execute_result"
453
  }
@@ -458,15 +458,15 @@
458
  },
459
  {
460
  "cell_type": "code",
461
- "execution_count": 16,
462
  "metadata": {},
463
  "outputs": [
464
  {
465
  "name": "stdout",
466
  "output_type": "stream",
467
  "text": [
468
- "2022-06-24\n",
469
- "2024-05-08\n"
470
  ]
471
  }
472
  ],
@@ -477,7 +477,7 @@
477
  },
478
  {
479
  "cell_type": "code",
480
- "execution_count": 17,
481
  "metadata": {},
482
  "outputs": [
483
  {
@@ -494,16 +494,16 @@
494
  },
495
  {
496
  "cell_type": "code",
497
- "execution_count": 18,
498
  "metadata": {},
499
  "outputs": [
500
  {
501
  "data": {
502
  "text/plain": [
503
- "(75, 4)"
504
  ]
505
  },
506
- "execution_count": 18,
507
  "metadata": {},
508
  "output_type": "execute_result"
509
  }
@@ -514,7 +514,7 @@
514
  },
515
  {
516
  "cell_type": "code",
517
- "execution_count": 19,
518
  "metadata": {},
519
  "outputs": [],
520
  "source": [
@@ -523,7 +523,7 @@
523
  },
524
  {
525
  "cell_type": "code",
526
- "execution_count": 20,
527
  "metadata": {},
528
  "outputs": [
529
  {
@@ -532,7 +532,7 @@
532
  "(0, 4)"
533
  ]
534
  },
535
- "execution_count": 20,
536
  "metadata": {},
537
  "output_type": "execute_result"
538
  }
@@ -543,7 +543,7 @@
543
  },
544
  {
545
  "cell_type": "code",
546
- "execution_count": 21,
547
  "metadata": {},
548
  "outputs": [
549
  {
@@ -575,39 +575,39 @@
575
  " </thead>\n",
576
  " <tbody>\n",
577
  " <tr>\n",
578
- " <th>74</th>\n",
579
- " <td>2024-05-08</td>\n",
580
- " <td>TSLA</td>\n",
581
- " <td>0.010694</td>\n",
582
- " <td>0.010694</td>\n",
583
- " </tr>\n",
584
- " <tr>\n",
585
  " <th>73</th>\n",
586
- " <td>2024-05-07</td>\n",
587
  " <td>TSLA</td>\n",
588
- " <td>0.032778</td>\n",
589
- " <td>0.016215</td>\n",
590
  " </tr>\n",
591
  " <tr>\n",
592
  " <th>72</th>\n",
593
- " <td>2024-05-06</td>\n",
594
  " <td>TSLA</td>\n",
595
- " <td>0.152492</td>\n",
596
- " <td>0.050285</td>\n",
597
  " </tr>\n",
598
  " <tr>\n",
599
  " <th>71</th>\n",
600
- " <td>2024-05-05</td>\n",
601
  " <td>TSLA</td>\n",
602
- " <td>0.036190</td>\n",
603
- " <td>0.046761</td>\n",
604
  " </tr>\n",
605
  " <tr>\n",
606
  " <th>70</th>\n",
607
- " <td>2024-05-04</td>\n",
 
 
 
 
 
 
 
608
  " <td>TSLA</td>\n",
609
- " <td>0.062665</td>\n",
610
- " <td>0.050737</td>\n",
611
  " </tr>\n",
612
  " </tbody>\n",
613
  "</table>\n",
@@ -615,14 +615,14 @@
615
  ],
616
  "text/plain": [
617
  " date ticker sentiment exp_mean_7_days\n",
618
- "74 2024-05-08 TSLA 0.010694 0.010694\n",
619
- "73 2024-05-07 TSLA 0.032778 0.016215\n",
620
- "72 2024-05-06 TSLA 0.152492 0.050285\n",
621
- "71 2024-05-05 TSLA 0.036190 0.046761\n",
622
- "70 2024-05-04 TSLA 0.062665 0.050737"
623
  ]
624
  },
625
- "execution_count": 21,
626
  "metadata": {},
627
  "output_type": "execute_result"
628
  }
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [
8
  {
 
11
  "True"
12
  ]
13
  },
14
+ "execution_count": 1,
15
  "metadata": {},
16
  "output_type": "execute_result"
17
  }
 
38
  "name": "stdout",
39
  "output_type": "stream",
40
  "text": [
41
+ "Fetched 50 articles from 2022-05-14 to 2022-07-03\n",
42
+ "Fetched 50 articles from 2022-07-04 to 2022-08-23\n",
43
+ "Fetched 50 articles from 2022-08-24 to 2022-10-13\n",
44
+ "Fetched 50 articles from 2022-10-14 to 2022-12-03\n",
45
+ "Fetched 50 articles from 2022-12-04 to 2023-01-23\n",
46
  "Rate limit reached. Waiting to retry...\n",
47
+ "Fetched 50 articles from 2023-01-24 to 2023-03-15\n",
48
+ "Fetched 50 articles from 2023-03-16 to 2023-05-05\n",
49
+ "Fetched 50 articles from 2023-05-06 to 2023-06-25\n",
50
+ "Fetched 50 articles from 2023-06-26 to 2023-08-15\n",
51
+ "Fetched 50 articles from 2023-08-16 to 2023-10-05\n",
52
  "Rate limit reached. Waiting to retry...\n",
53
+ "Fetched 50 articles from 2023-10-06 to 2023-11-25\n",
54
+ "Fetched 50 articles from 2023-11-26 to 2024-01-15\n",
55
+ "Fetched 50 articles from 2024-01-16 to 2024-03-06\n",
56
+ "Fetched 50 articles from 2024-03-07 to 2024-04-26\n",
57
+ "Fetched 50 articles from 2024-04-27 to 2024-05-13\n",
58
  "Total articles fetched: 750\n"
59
  ]
60
  }
 
121
  },
122
  {
123
  "cell_type": "code",
124
+ "execution_count": 3,
125
  "metadata": {},
126
  "outputs": [],
127
  "source": [
 
131
  },
132
  {
133
  "cell_type": "code",
134
+ "execution_count": 4,
135
  "metadata": {},
136
  "outputs": [
137
  {
 
139
  "output_type": "stream",
140
  "text": [
141
  "<class 'pandas.core.frame.DataFrame'>\n",
142
+ "RangeIndex: 74 entries, 0 to 73\n",
143
  "Data columns (total 3 columns):\n",
144
  " # Column Non-Null Count Dtype \n",
145
  "--- ------ -------------- ----- \n",
146
+ " 0 date 74 non-null object \n",
147
+ " 1 ticker 74 non-null object \n",
148
+ " 2 sentiment 74 non-null float64\n",
149
  "dtypes: float64(1), object(2)\n",
150
  "memory usage: 1.9+ KB\n"
151
  ]
 
157
  },
158
  {
159
  "cell_type": "code",
160
+ "execution_count": 5,
161
  "metadata": {},
162
  "outputs": [
163
  {
 
189
  " <tbody>\n",
190
  " <tr>\n",
191
  " <th>0</th>\n",
192
+ " <td>2022-06-29</td>\n",
193
  " <td>TSLA</td>\n",
194
+ " <td>0.076381</td>\n",
195
  " </tr>\n",
196
  " <tr>\n",
197
  " <th>1</th>\n",
198
+ " <td>2022-06-30</td>\n",
199
  " <td>TSLA</td>\n",
200
+ " <td>0.084328</td>\n",
201
  " </tr>\n",
202
  " <tr>\n",
203
  " <th>2</th>\n",
204
+ " <td>2022-07-01</td>\n",
205
  " <td>TSLA</td>\n",
206
+ " <td>0.178838</td>\n",
207
  " </tr>\n",
208
  " <tr>\n",
209
  " <th>3</th>\n",
210
+ " <td>2022-07-02</td>\n",
211
  " <td>TSLA</td>\n",
212
+ " <td>0.037667</td>\n",
213
  " </tr>\n",
214
  " <tr>\n",
215
  " <th>4</th>\n",
216
+ " <td>2022-07-03</td>\n",
217
  " <td>TSLA</td>\n",
218
+ " <td>-0.375000</td>\n",
219
  " </tr>\n",
220
  " </tbody>\n",
221
  "</table>\n",
 
223
  ],
224
  "text/plain": [
225
  " date ticker sentiment\n",
226
+ "0 2022-06-29 TSLA 0.076381\n",
227
+ "1 2022-06-30 TSLA 0.084328\n",
228
+ "2 2022-07-01 TSLA 0.178838\n",
229
+ "3 2022-07-02 TSLA 0.037667\n",
230
+ "4 2022-07-03 TSLA -0.375000"
231
  ]
232
  },
233
+ "execution_count": 5,
234
  "metadata": {},
235
  "output_type": "execute_result"
236
  }
 
241
  },
242
  {
243
  "cell_type": "code",
244
+ "execution_count": 6,
245
  "metadata": {},
246
  "outputs": [],
247
  "source": [
 
250
  },
251
  {
252
  "cell_type": "code",
253
+ "execution_count": 7,
254
  "metadata": {},
255
  "outputs": [],
256
  "source": [
 
260
  },
261
  {
262
  "cell_type": "code",
263
+ "execution_count": 8,
264
  "metadata": {},
265
  "outputs": [],
266
  "source": [
 
269
  },
270
  {
271
  "cell_type": "code",
272
+ "execution_count": 9,
273
  "metadata": {},
274
  "outputs": [],
275
  "source": [
 
278
  },
279
  {
280
  "cell_type": "code",
281
+ "execution_count": 10,
282
  "metadata": {},
283
  "outputs": [
284
  {
 
310
  " </thead>\n",
311
  " <tbody>\n",
312
  " <tr>\n",
 
 
 
 
 
 
 
313
  " <th>73</th>\n",
314
+ " <td>2024-05-13</td>\n",
315
  " <td>TSLA</td>\n",
316
+ " <td>0.115443</td>\n",
317
+ " <td>0.115443</td>\n",
318
  " </tr>\n",
319
  " <tr>\n",
320
  " <th>72</th>\n",
321
+ " <td>2024-05-12</td>\n",
322
  " <td>TSLA</td>\n",
323
+ " <td>0.037500</td>\n",
324
+ " <td>0.095957</td>\n",
325
  " </tr>\n",
326
  " <tr>\n",
327
  " <th>71</th>\n",
328
+ " <td>2024-05-11</td>\n",
329
  " <td>TSLA</td>\n",
330
+ " <td>0.100000</td>\n",
331
+ " <td>0.096968</td>\n",
332
  " </tr>\n",
333
  " <tr>\n",
334
  " <th>70</th>\n",
335
+ " <td>2024-05-10</td>\n",
336
+ " <td>TSLA</td>\n",
337
+ " <td>0.069650</td>\n",
338
+ " <td>0.090138</td>\n",
339
+ " </tr>\n",
340
+ " <tr>\n",
341
+ " <th>69</th>\n",
342
+ " <td>2024-05-09</td>\n",
343
  " <td>TSLA</td>\n",
344
+ " <td>-0.031250</td>\n",
345
+ " <td>0.059791</td>\n",
346
  " </tr>\n",
347
  " </tbody>\n",
348
  "</table>\n",
 
350
  ],
351
  "text/plain": [
352
  " date ticker sentiment exp_mean_7_days\n",
353
+ "73 2024-05-13 TSLA 0.115443 0.115443\n",
354
+ "72 2024-05-12 TSLA 0.037500 0.095957\n",
355
+ "71 2024-05-11 TSLA 0.100000 0.096968\n",
356
+ "70 2024-05-10 TSLA 0.069650 0.090138\n",
357
+ "69 2024-05-09 TSLA -0.031250 0.059791"
358
  ]
359
  },
360
+ "execution_count": 10,
361
  "metadata": {},
362
  "output_type": "execute_result"
363
  }
 
368
  },
369
  {
370
  "cell_type": "code",
371
+ "execution_count": 11,
372
  "metadata": {},
373
  "outputs": [
374
  {
 
401
  " <tbody>\n",
402
  " <tr>\n",
403
  " <th>4</th>\n",
404
+ " <td>2022-07-03</td>\n",
405
  " <td>TSLA</td>\n",
406
+ " <td>-0.375000</td>\n",
407
+ " <td>-0.004703</td>\n",
408
  " </tr>\n",
409
  " <tr>\n",
410
  " <th>3</th>\n",
411
+ " <td>2022-07-02</td>\n",
412
  " <td>TSLA</td>\n",
413
+ " <td>0.037667</td>\n",
414
+ " <td>0.005889</td>\n",
415
  " </tr>\n",
416
  " <tr>\n",
417
  " <th>2</th>\n",
418
+ " <td>2022-07-01</td>\n",
419
  " <td>TSLA</td>\n",
420
+ " <td>0.178838</td>\n",
421
+ " <td>0.049127</td>\n",
422
  " </tr>\n",
423
  " <tr>\n",
424
  " <th>1</th>\n",
425
+ " <td>2022-06-30</td>\n",
426
  " <td>TSLA</td>\n",
427
+ " <td>0.084328</td>\n",
428
+ " <td>0.057927</td>\n",
429
  " </tr>\n",
430
  " <tr>\n",
431
  " <th>0</th>\n",
432
+ " <td>2022-06-29</td>\n",
433
  " <td>TSLA</td>\n",
434
+ " <td>0.076381</td>\n",
435
+ " <td>0.062540</td>\n",
436
  " </tr>\n",
437
  " </tbody>\n",
438
  "</table>\n",
 
440
  ],
441
  "text/plain": [
442
  " date ticker sentiment exp_mean_7_days\n",
443
+ "4 2022-07-03 TSLA -0.375000 -0.004703\n",
444
+ "3 2022-07-02 TSLA 0.037667 0.005889\n",
445
+ "2 2022-07-01 TSLA 0.178838 0.049127\n",
446
+ "1 2022-06-30 TSLA 0.084328 0.057927\n",
447
+ "0 2022-06-29 TSLA 0.076381 0.062540"
448
  ]
449
  },
450
+ "execution_count": 11,
451
  "metadata": {},
452
  "output_type": "execute_result"
453
  }
 
458
  },
459
  {
460
  "cell_type": "code",
461
+ "execution_count": 12,
462
  "metadata": {},
463
  "outputs": [
464
  {
465
  "name": "stdout",
466
  "output_type": "stream",
467
  "text": [
468
+ "2022-06-29\n",
469
+ "2024-05-13\n"
470
  ]
471
  }
472
  ],
 
477
  },
478
  {
479
  "cell_type": "code",
480
+ "execution_count": 13,
481
  "metadata": {},
482
  "outputs": [
483
  {
 
494
  },
495
  {
496
  "cell_type": "code",
497
+ "execution_count": 14,
498
  "metadata": {},
499
  "outputs": [
500
  {
501
  "data": {
502
  "text/plain": [
503
+ "(74, 4)"
504
  ]
505
  },
506
+ "execution_count": 14,
507
  "metadata": {},
508
  "output_type": "execute_result"
509
  }
 
514
  },
515
  {
516
  "cell_type": "code",
517
+ "execution_count": 15,
518
  "metadata": {},
519
  "outputs": [],
520
  "source": [
 
523
  },
524
  {
525
  "cell_type": "code",
526
+ "execution_count": 16,
527
  "metadata": {},
528
  "outputs": [
529
  {
 
532
  "(0, 4)"
533
  ]
534
  },
535
+ "execution_count": 16,
536
  "metadata": {},
537
  "output_type": "execute_result"
538
  }
 
543
  },
544
  {
545
  "cell_type": "code",
546
+ "execution_count": 17,
547
  "metadata": {},
548
  "outputs": [
549
  {
 
575
  " </thead>\n",
576
  " <tbody>\n",
577
  " <tr>\n",
 
 
 
 
 
 
 
578
  " <th>73</th>\n",
579
+ " <td>2024-05-13</td>\n",
580
  " <td>TSLA</td>\n",
581
+ " <td>0.115443</td>\n",
582
+ " <td>0.115443</td>\n",
583
  " </tr>\n",
584
  " <tr>\n",
585
  " <th>72</th>\n",
586
+ " <td>2024-05-12</td>\n",
587
  " <td>TSLA</td>\n",
588
+ " <td>0.037500</td>\n",
589
+ " <td>0.095957</td>\n",
590
  " </tr>\n",
591
  " <tr>\n",
592
  " <th>71</th>\n",
593
+ " <td>2024-05-11</td>\n",
594
  " <td>TSLA</td>\n",
595
+ " <td>0.100000</td>\n",
596
+ " <td>0.096968</td>\n",
597
  " </tr>\n",
598
  " <tr>\n",
599
  " <th>70</th>\n",
600
+ " <td>2024-05-10</td>\n",
601
+ " <td>TSLA</td>\n",
602
+ " <td>0.069650</td>\n",
603
+ " <td>0.090138</td>\n",
604
+ " </tr>\n",
605
+ " <tr>\n",
606
+ " <th>69</th>\n",
607
+ " <td>2024-05-09</td>\n",
608
  " <td>TSLA</td>\n",
609
+ " <td>-0.031250</td>\n",
610
+ " <td>0.059791</td>\n",
611
  " </tr>\n",
612
  " </tbody>\n",
613
  "</table>\n",
 
615
  ],
616
  "text/plain": [
617
  " date ticker sentiment exp_mean_7_days\n",
618
+ "73 2024-05-13 TSLA 0.115443 0.115443\n",
619
+ "72 2024-05-12 TSLA 0.037500 0.095957\n",
620
+ "71 2024-05-11 TSLA 0.100000 0.096968\n",
621
+ "70 2024-05-10 TSLA 0.069650 0.090138\n",
622
+ "69 2024-05-09 TSLA -0.031250 0.059791"
623
  ]
624
  },
625
+ "execution_count": 17,
626
  "metadata": {},
627
  "output_type": "execute_result"
628
  }
Stocks news prediction/Notebooks/2_historical_stock.ipynb CHANGED
@@ -45,13 +45,13 @@
45
  "name": "stdout",
46
  "output_type": "stream",
47
  "text": [
48
- " 1. open 2. high 3. low 4. close 5. volume ticker\n",
49
- "date \n",
50
- "2024-05-08 171.59 176.06 170.15 174.72 79969488.0 TSLA\n",
51
- "2024-05-07 182.40 183.26 177.40 177.81 75045854.0 TSLA\n",
52
- "2024-05-06 183.80 187.56 182.20 184.76 84390253.0 TSLA\n",
53
- "2024-05-03 182.10 184.78 178.42 181.19 75491539.0 TSLA\n",
54
- "2024-05-02 182.86 184.60 176.02 180.01 89148041.0 TSLA\n"
55
  ]
56
  }
57
  ],
@@ -80,7 +80,7 @@
80
  },
81
  {
82
  "cell_type": "code",
83
- "execution_count": 4,
84
  "metadata": {},
85
  "outputs": [
86
  {
 
45
  "name": "stdout",
46
  "output_type": "stream",
47
  "text": [
48
+ " 1. open 2. high 3. low 4. close 5. volume ticker\n",
49
+ "date \n",
50
+ "2024-05-13 170.00 175.4000 169.00 171.89 67018903.0 TSLA\n",
51
+ "2024-05-10 173.05 173.0599 167.75 168.47 72627178.0 TSLA\n",
52
+ "2024-05-09 175.01 175.6200 171.37 171.97 65950292.0 TSLA\n",
53
+ "2024-05-08 171.59 176.0600 170.15 174.72 79969488.0 TSLA\n",
54
+ "2024-05-07 182.40 183.2600 177.40 177.81 75045854.0 TSLA\n"
55
  ]
56
  }
57
  ],
 
80
  },
81
  {
82
  "cell_type": "code",
83
+ "execution_count": 3,
84
  "metadata": {},
85
  "outputs": [
86
  {
Stocks news prediction/Notebooks/3_news_preprocessing.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 6,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
@@ -18,7 +18,7 @@
18
  },
19
  {
20
  "cell_type": "code",
21
- "execution_count": 7,
22
  "metadata": {},
23
  "outputs": [],
24
  "source": [
@@ -55,7 +55,7 @@
55
  },
56
  {
57
  "cell_type": "code",
58
- "execution_count": 10,
59
  "metadata": {},
60
  "outputs": [],
61
  "source": [
@@ -84,7 +84,7 @@
84
  "name": "python",
85
  "nbconvert_exporter": "python",
86
  "pygments_lexer": "ipython3",
87
- "version": "3.11.4"
88
  },
89
  "orig_nbformat": 4
90
  },
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
 
18
  },
19
  {
20
  "cell_type": "code",
21
+ "execution_count": 2,
22
  "metadata": {},
23
  "outputs": [],
24
  "source": [
 
55
  },
56
  {
57
  "cell_type": "code",
58
+ "execution_count": 3,
59
  "metadata": {},
60
  "outputs": [],
61
  "source": [
 
84
  "name": "python",
85
  "nbconvert_exporter": "python",
86
  "pygments_lexer": "ipython3",
87
+ "version": "3.11.9"
88
  },
89
  "orig_nbformat": 4
90
  },
Stocks news prediction/Notebooks/4_stock_preprocessing.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 2,
6
  "metadata": {},
7
  "outputs": [
8
  {
@@ -11,7 +11,7 @@
11
  "True"
12
  ]
13
  },
14
- "execution_count": 2,
15
  "metadata": {},
16
  "output_type": "execute_result"
17
  }
@@ -45,13 +45,13 @@
45
  "name": "stdout",
46
  "output_type": "stream",
47
  "text": [
48
- " 1. open 2. high 3. low 4. close 5. volume\n",
49
- "date \n",
50
- "2024-05-02 182.86 184.60 176.0200 180.01 89148041.0\n",
51
- "2024-05-01 182.00 185.86 179.0100 179.99 92829719.0\n",
52
- "2024-04-30 186.98 190.95 182.8401 183.28 127031787.0\n",
53
- "2024-04-29 188.42 198.87 184.5400 194.05 243869678.0\n",
54
- "2024-04-26 168.85 172.12 166.3700 168.29 109815725.0\n"
55
  ]
56
  }
57
  ],
@@ -68,7 +68,7 @@
68
  },
69
  {
70
  "cell_type": "code",
71
- "execution_count": 4,
72
  "metadata": {},
73
  "outputs": [
74
  {
@@ -76,17 +76,17 @@
76
  "output_type": "stream",
77
  "text": [
78
  "<class 'pandas.core.frame.DataFrame'>\n",
79
- "DatetimeIndex: 3485 entries, 2024-05-02 to 2010-06-29\n",
80
  "Data columns (total 5 columns):\n",
81
  " # Column Non-Null Count Dtype \n",
82
  "--- ------ -------------- ----- \n",
83
- " 0 1. open 3485 non-null float64\n",
84
- " 1 2. high 3485 non-null float64\n",
85
- " 2 3. low 3485 non-null float64\n",
86
- " 3 4. close 3485 non-null float64\n",
87
- " 4 5. volume 3485 non-null float64\n",
88
  "dtypes: float64(5)\n",
89
- "memory usage: 163.4 KB\n"
90
  ]
91
  }
92
  ],
@@ -97,7 +97,7 @@
97
  },
98
  {
99
  "cell_type": "code",
100
- "execution_count": 5,
101
  "metadata": {},
102
  "outputs": [
103
  {
@@ -105,12 +105,12 @@
105
  "text/plain": [
106
  "{'1. Information': 'Daily Prices (open, high, low, close) and Volumes',\n",
107
  " '2. Symbol': 'TSLA',\n",
108
- " '3. Last Refreshed': '2024-05-02',\n",
109
  " '4. Output Size': 'Full size',\n",
110
  " '5. Time Zone': 'US/Eastern'}"
111
  ]
112
  },
113
- "execution_count": 5,
114
  "metadata": {},
115
  "output_type": "execute_result"
116
  }
@@ -122,7 +122,7 @@
122
  },
123
  {
124
  "cell_type": "code",
125
- "execution_count": 6,
126
  "metadata": {},
127
  "outputs": [],
128
  "source": [
@@ -141,7 +141,7 @@
141
  },
142
  {
143
  "cell_type": "code",
144
- "execution_count": 7,
145
  "metadata": {},
146
  "outputs": [],
147
  "source": [
@@ -169,7 +169,7 @@
169
  },
170
  {
171
  "cell_type": "code",
172
- "execution_count": 8,
173
  "metadata": {},
174
  "outputs": [],
175
  "source": [
@@ -236,7 +236,7 @@
236
  },
237
  {
238
  "cell_type": "code",
239
- "execution_count": 9,
240
  "metadata": {},
241
  "outputs": [],
242
  "source": [
@@ -249,7 +249,7 @@
249
  },
250
  {
251
  "cell_type": "code",
252
- "execution_count": 10,
253
  "metadata": {},
254
  "outputs": [],
255
  "source": [
@@ -258,7 +258,7 @@
258
  },
259
  {
260
  "cell_type": "code",
261
- "execution_count": 13,
262
  "metadata": {},
263
  "outputs": [
264
  {
@@ -299,60 +299,60 @@
299
  " </thead>\n",
300
  " <tbody>\n",
301
  " <tr>\n",
302
- " <th>2024-05-02</th>\n",
303
- " <td>182.86</td>\n",
304
- " <td>184.60</td>\n",
305
- " <td>176.0200</td>\n",
306
- " <td>180.01</td>\n",
307
- " <td>89148041.0</td>\n",
308
  " </tr>\n",
309
  " <tr>\n",
310
- " <th>2024-05-01</th>\n",
311
- " <td>182.00</td>\n",
312
- " <td>185.86</td>\n",
313
- " <td>179.0100</td>\n",
314
- " <td>179.99</td>\n",
315
- " <td>92829719.0</td>\n",
316
  " </tr>\n",
317
  " <tr>\n",
318
- " <th>2024-04-30</th>\n",
319
- " <td>186.98</td>\n",
320
- " <td>190.95</td>\n",
321
- " <td>182.8401</td>\n",
322
- " <td>183.28</td>\n",
323
- " <td>127031787.0</td>\n",
324
  " </tr>\n",
325
  " <tr>\n",
326
- " <th>2024-04-29</th>\n",
327
- " <td>188.42</td>\n",
328
- " <td>198.87</td>\n",
329
- " <td>184.5400</td>\n",
330
- " <td>194.05</td>\n",
331
- " <td>243869678.0</td>\n",
332
  " </tr>\n",
333
  " <tr>\n",
334
- " <th>2024-04-26</th>\n",
335
- " <td>168.85</td>\n",
336
- " <td>172.12</td>\n",
337
- " <td>166.3700</td>\n",
338
- " <td>168.29</td>\n",
339
- " <td>109815725.0</td>\n",
340
  " </tr>\n",
341
  " </tbody>\n",
342
  "</table>\n",
343
  "</div>"
344
  ],
345
  "text/plain": [
346
- " open high low close volume\n",
347
- "date \n",
348
- "2024-05-02 182.86 184.60 176.0200 180.01 89148041.0\n",
349
- "2024-05-01 182.00 185.86 179.0100 179.99 92829719.0\n",
350
- "2024-04-30 186.98 190.95 182.8401 183.28 127031787.0\n",
351
- "2024-04-29 188.42 198.87 184.5400 194.05 243869678.0\n",
352
- "2024-04-26 168.85 172.12 166.3700 168.29 109815725.0"
353
  ]
354
  },
355
- "execution_count": 13,
356
  "metadata": {},
357
  "output_type": "execute_result"
358
  }
@@ -363,7 +363,7 @@
363
  },
364
  {
365
  "cell_type": "code",
366
- "execution_count": 14,
367
  "metadata": {},
368
  "outputs": [],
369
  "source": [
@@ -372,7 +372,7 @@
372
  },
373
  {
374
  "cell_type": "code",
375
- "execution_count": 15,
376
  "metadata": {},
377
  "outputs": [
378
  {
@@ -407,63 +407,63 @@
407
  " <tbody>\n",
408
  " <tr>\n",
409
  " <th>0</th>\n",
410
- " <td>2024-05-02</td>\n",
411
- " <td>182.86</td>\n",
412
- " <td>184.60</td>\n",
413
- " <td>176.0200</td>\n",
414
- " <td>180.01</td>\n",
415
- " <td>89148041.0</td>\n",
416
  " </tr>\n",
417
  " <tr>\n",
418
  " <th>1</th>\n",
419
- " <td>2024-05-01</td>\n",
420
- " <td>182.00</td>\n",
421
- " <td>185.86</td>\n",
422
- " <td>179.0100</td>\n",
423
- " <td>179.99</td>\n",
424
- " <td>92829719.0</td>\n",
425
  " </tr>\n",
426
  " <tr>\n",
427
  " <th>2</th>\n",
428
- " <td>2024-04-30</td>\n",
429
- " <td>186.98</td>\n",
430
- " <td>190.95</td>\n",
431
- " <td>182.8401</td>\n",
432
- " <td>183.28</td>\n",
433
- " <td>127031787.0</td>\n",
434
  " </tr>\n",
435
  " <tr>\n",
436
  " <th>3</th>\n",
437
- " <td>2024-04-29</td>\n",
438
- " <td>188.42</td>\n",
439
- " <td>198.87</td>\n",
440
- " <td>184.5400</td>\n",
441
- " <td>194.05</td>\n",
442
- " <td>243869678.0</td>\n",
443
  " </tr>\n",
444
  " <tr>\n",
445
  " <th>4</th>\n",
446
- " <td>2024-04-26</td>\n",
447
- " <td>168.85</td>\n",
448
- " <td>172.12</td>\n",
449
- " <td>166.3700</td>\n",
450
- " <td>168.29</td>\n",
451
- " <td>109815725.0</td>\n",
452
  " </tr>\n",
453
  " </tbody>\n",
454
  "</table>\n",
455
  "</div>"
456
  ],
457
  "text/plain": [
458
- " date open high low close volume\n",
459
- "0 2024-05-02 182.86 184.60 176.0200 180.01 89148041.0\n",
460
- "1 2024-05-01 182.00 185.86 179.0100 179.99 92829719.0\n",
461
- "2 2024-04-30 186.98 190.95 182.8401 183.28 127031787.0\n",
462
- "3 2024-04-29 188.42 198.87 184.5400 194.05 243869678.0\n",
463
- "4 2024-04-26 168.85 172.12 166.3700 168.29 109815725.0"
464
  ]
465
  },
466
- "execution_count": 15,
467
  "metadata": {},
468
  "output_type": "execute_result"
469
  }
@@ -474,7 +474,7 @@
474
  },
475
  {
476
  "cell_type": "code",
477
- "execution_count": 42,
478
  "metadata": {},
479
  "outputs": [],
480
  "source": [
@@ -485,7 +485,7 @@
485
  },
486
  {
487
  "cell_type": "code",
488
- "execution_count": 43,
489
  "metadata": {},
490
  "outputs": [],
491
  "source": [
@@ -495,7 +495,7 @@
495
  },
496
  {
497
  "cell_type": "code",
498
- "execution_count": 44,
499
  "metadata": {},
500
  "outputs": [
501
  {
@@ -530,63 +530,63 @@
530
  " <tbody>\n",
531
  " <tr>\n",
532
  " <th>0</th>\n",
533
- " <td>2024-05-02</td>\n",
534
- " <td>182.86</td>\n",
535
- " <td>184.60</td>\n",
536
- " <td>176.0200</td>\n",
537
- " <td>180.01</td>\n",
538
- " <td>89148041.0</td>\n",
539
  " </tr>\n",
540
  " <tr>\n",
541
  " <th>1</th>\n",
542
- " <td>2024-05-01</td>\n",
543
- " <td>182.00</td>\n",
544
- " <td>185.86</td>\n",
545
- " <td>179.0100</td>\n",
546
- " <td>179.99</td>\n",
547
- " <td>92829719.0</td>\n",
548
  " </tr>\n",
549
  " <tr>\n",
550
  " <th>2</th>\n",
551
- " <td>2024-04-30</td>\n",
552
- " <td>186.98</td>\n",
553
- " <td>190.95</td>\n",
554
- " <td>182.8401</td>\n",
555
- " <td>183.28</td>\n",
556
- " <td>127031787.0</td>\n",
557
  " </tr>\n",
558
  " <tr>\n",
559
  " <th>3</th>\n",
560
- " <td>2024-04-29</td>\n",
561
- " <td>188.42</td>\n",
562
- " <td>198.87</td>\n",
563
- " <td>184.5400</td>\n",
564
- " <td>194.05</td>\n",
565
- " <td>243869678.0</td>\n",
566
  " </tr>\n",
567
  " <tr>\n",
568
  " <th>4</th>\n",
569
- " <td>2024-04-26</td>\n",
570
- " <td>168.85</td>\n",
571
- " <td>172.12</td>\n",
572
- " <td>166.3700</td>\n",
573
- " <td>168.29</td>\n",
574
- " <td>109815725.0</td>\n",
575
  " </tr>\n",
576
  " </tbody>\n",
577
  "</table>\n",
578
  "</div>"
579
  ],
580
  "text/plain": [
581
- " date open high low close volume\n",
582
- "0 2024-05-02 182.86 184.60 176.0200 180.01 89148041.0\n",
583
- "1 2024-05-01 182.00 185.86 179.0100 179.99 92829719.0\n",
584
- "2 2024-04-30 186.98 190.95 182.8401 183.28 127031787.0\n",
585
- "3 2024-04-29 188.42 198.87 184.5400 194.05 243869678.0\n",
586
- "4 2024-04-26 168.85 172.12 166.3700 168.29 109815725.0"
587
  ]
588
  },
589
- "execution_count": 44,
590
  "metadata": {},
591
  "output_type": "execute_result"
592
  }
@@ -597,15 +597,15 @@
597
  },
598
  {
599
  "cell_type": "code",
600
- "execution_count": 45,
601
  "metadata": {},
602
  "outputs": [
603
  {
604
  "name": "stdout",
605
  "output_type": "stream",
606
  "text": [
607
- "2022-06-21 00:00:00\n",
608
- "2024-05-02 00:00:00\n"
609
  ]
610
  }
611
  ],
@@ -616,7 +616,7 @@
616
  },
617
  {
618
  "cell_type": "code",
619
- "execution_count": 46,
620
  "metadata": {},
621
  "outputs": [
622
  {
@@ -625,7 +625,7 @@
625
  "(470, 6)"
626
  ]
627
  },
628
- "execution_count": 46,
629
  "metadata": {},
630
  "output_type": "execute_result"
631
  }
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [
8
  {
 
11
  "True"
12
  ]
13
  },
14
+ "execution_count": 1,
15
  "metadata": {},
16
  "output_type": "execute_result"
17
  }
 
45
  "name": "stdout",
46
  "output_type": "stream",
47
  "text": [
48
+ " 1. open 2. high 3. low 4. close 5. volume\n",
49
+ "date \n",
50
+ "2024-05-13 170.00 175.4000 169.00 171.89 67018903.0\n",
51
+ "2024-05-10 173.05 173.0599 167.75 168.47 72627178.0\n",
52
+ "2024-05-09 175.01 175.6200 171.37 171.97 65950292.0\n",
53
+ "2024-05-08 171.59 176.0600 170.15 174.72 79969488.0\n",
54
+ "2024-05-07 182.40 183.2600 177.40 177.81 75045854.0\n"
55
  ]
56
  }
57
  ],
 
68
  },
69
  {
70
  "cell_type": "code",
71
+ "execution_count": 3,
72
  "metadata": {},
73
  "outputs": [
74
  {
 
76
  "output_type": "stream",
77
  "text": [
78
  "<class 'pandas.core.frame.DataFrame'>\n",
79
+ "DatetimeIndex: 3492 entries, 2024-05-13 to 2010-06-29\n",
80
  "Data columns (total 5 columns):\n",
81
  " # Column Non-Null Count Dtype \n",
82
  "--- ------ -------------- ----- \n",
83
+ " 0 1. open 3492 non-null float64\n",
84
+ " 1 2. high 3492 non-null float64\n",
85
+ " 2 3. low 3492 non-null float64\n",
86
+ " 3 4. close 3492 non-null float64\n",
87
+ " 4 5. volume 3492 non-null float64\n",
88
  "dtypes: float64(5)\n",
89
+ "memory usage: 163.7 KB\n"
90
  ]
91
  }
92
  ],
 
97
  },
98
  {
99
  "cell_type": "code",
100
+ "execution_count": 4,
101
  "metadata": {},
102
  "outputs": [
103
  {
 
105
  "text/plain": [
106
  "{'1. Information': 'Daily Prices (open, high, low, close) and Volumes',\n",
107
  " '2. Symbol': 'TSLA',\n",
108
+ " '3. Last Refreshed': '2024-05-13',\n",
109
  " '4. Output Size': 'Full size',\n",
110
  " '5. Time Zone': 'US/Eastern'}"
111
  ]
112
  },
113
+ "execution_count": 4,
114
  "metadata": {},
115
  "output_type": "execute_result"
116
  }
 
122
  },
123
  {
124
  "cell_type": "code",
125
+ "execution_count": 5,
126
  "metadata": {},
127
  "outputs": [],
128
  "source": [
 
141
  },
142
  {
143
  "cell_type": "code",
144
+ "execution_count": 6,
145
  "metadata": {},
146
  "outputs": [],
147
  "source": [
 
169
  },
170
  {
171
  "cell_type": "code",
172
+ "execution_count": 7,
173
  "metadata": {},
174
  "outputs": [],
175
  "source": [
 
236
  },
237
  {
238
  "cell_type": "code",
239
+ "execution_count": 8,
240
  "metadata": {},
241
  "outputs": [],
242
  "source": [
 
249
  },
250
  {
251
  "cell_type": "code",
252
+ "execution_count": 9,
253
  "metadata": {},
254
  "outputs": [],
255
  "source": [
 
258
  },
259
  {
260
  "cell_type": "code",
261
+ "execution_count": 10,
262
  "metadata": {},
263
  "outputs": [
264
  {
 
299
  " </thead>\n",
300
  " <tbody>\n",
301
  " <tr>\n",
302
+ " <th>2024-05-13</th>\n",
303
+ " <td>170.00</td>\n",
304
+ " <td>175.4000</td>\n",
305
+ " <td>169.00</td>\n",
306
+ " <td>171.89</td>\n",
307
+ " <td>67018903.0</td>\n",
308
  " </tr>\n",
309
  " <tr>\n",
310
+ " <th>2024-05-10</th>\n",
311
+ " <td>173.05</td>\n",
312
+ " <td>173.0599</td>\n",
313
+ " <td>167.75</td>\n",
314
+ " <td>168.47</td>\n",
315
+ " <td>72627178.0</td>\n",
316
  " </tr>\n",
317
  " <tr>\n",
318
+ " <th>2024-05-09</th>\n",
319
+ " <td>175.01</td>\n",
320
+ " <td>175.6200</td>\n",
321
+ " <td>171.37</td>\n",
322
+ " <td>171.97</td>\n",
323
+ " <td>65950292.0</td>\n",
324
  " </tr>\n",
325
  " <tr>\n",
326
+ " <th>2024-05-08</th>\n",
327
+ " <td>171.59</td>\n",
328
+ " <td>176.0600</td>\n",
329
+ " <td>170.15</td>\n",
330
+ " <td>174.72</td>\n",
331
+ " <td>79969488.0</td>\n",
332
  " </tr>\n",
333
  " <tr>\n",
334
+ " <th>2024-05-07</th>\n",
335
+ " <td>182.40</td>\n",
336
+ " <td>183.2600</td>\n",
337
+ " <td>177.40</td>\n",
338
+ " <td>177.81</td>\n",
339
+ " <td>75045854.0</td>\n",
340
  " </tr>\n",
341
  " </tbody>\n",
342
  "</table>\n",
343
  "</div>"
344
  ],
345
  "text/plain": [
346
+ " open high low close volume\n",
347
+ "date \n",
348
+ "2024-05-13 170.00 175.4000 169.00 171.89 67018903.0\n",
349
+ "2024-05-10 173.05 173.0599 167.75 168.47 72627178.0\n",
350
+ "2024-05-09 175.01 175.6200 171.37 171.97 65950292.0\n",
351
+ "2024-05-08 171.59 176.0600 170.15 174.72 79969488.0\n",
352
+ "2024-05-07 182.40 183.2600 177.40 177.81 75045854.0"
353
  ]
354
  },
355
+ "execution_count": 10,
356
  "metadata": {},
357
  "output_type": "execute_result"
358
  }
 
363
  },
364
  {
365
  "cell_type": "code",
366
+ "execution_count": 11,
367
  "metadata": {},
368
  "outputs": [],
369
  "source": [
 
372
  },
373
  {
374
  "cell_type": "code",
375
+ "execution_count": 12,
376
  "metadata": {},
377
  "outputs": [
378
  {
 
407
  " <tbody>\n",
408
  " <tr>\n",
409
  " <th>0</th>\n",
410
+ " <td>2024-05-13</td>\n",
411
+ " <td>170.00</td>\n",
412
+ " <td>175.4000</td>\n",
413
+ " <td>169.00</td>\n",
414
+ " <td>171.89</td>\n",
415
+ " <td>67018903.0</td>\n",
416
  " </tr>\n",
417
  " <tr>\n",
418
  " <th>1</th>\n",
419
+ " <td>2024-05-10</td>\n",
420
+ " <td>173.05</td>\n",
421
+ " <td>173.0599</td>\n",
422
+ " <td>167.75</td>\n",
423
+ " <td>168.47</td>\n",
424
+ " <td>72627178.0</td>\n",
425
  " </tr>\n",
426
  " <tr>\n",
427
  " <th>2</th>\n",
428
+ " <td>2024-05-09</td>\n",
429
+ " <td>175.01</td>\n",
430
+ " <td>175.6200</td>\n",
431
+ " <td>171.37</td>\n",
432
+ " <td>171.97</td>\n",
433
+ " <td>65950292.0</td>\n",
434
  " </tr>\n",
435
  " <tr>\n",
436
  " <th>3</th>\n",
437
+ " <td>2024-05-08</td>\n",
438
+ " <td>171.59</td>\n",
439
+ " <td>176.0600</td>\n",
440
+ " <td>170.15</td>\n",
441
+ " <td>174.72</td>\n",
442
+ " <td>79969488.0</td>\n",
443
  " </tr>\n",
444
  " <tr>\n",
445
  " <th>4</th>\n",
446
+ " <td>2024-05-07</td>\n",
447
+ " <td>182.40</td>\n",
448
+ " <td>183.2600</td>\n",
449
+ " <td>177.40</td>\n",
450
+ " <td>177.81</td>\n",
451
+ " <td>75045854.0</td>\n",
452
  " </tr>\n",
453
  " </tbody>\n",
454
  "</table>\n",
455
  "</div>"
456
  ],
457
  "text/plain": [
458
+ " date open high low close volume\n",
459
+ "0 2024-05-13 170.00 175.4000 169.00 171.89 67018903.0\n",
460
+ "1 2024-05-10 173.05 173.0599 167.75 168.47 72627178.0\n",
461
+ "2 2024-05-09 175.01 175.6200 171.37 171.97 65950292.0\n",
462
+ "3 2024-05-08 171.59 176.0600 170.15 174.72 79969488.0\n",
463
+ "4 2024-05-07 182.40 183.2600 177.40 177.81 75045854.0"
464
  ]
465
  },
466
+ "execution_count": 12,
467
  "metadata": {},
468
  "output_type": "execute_result"
469
  }
 
474
  },
475
  {
476
  "cell_type": "code",
477
+ "execution_count": 13,
478
  "metadata": {},
479
  "outputs": [],
480
  "source": [
 
485
  },
486
  {
487
  "cell_type": "code",
488
+ "execution_count": 14,
489
  "metadata": {},
490
  "outputs": [],
491
  "source": [
 
495
  },
496
  {
497
  "cell_type": "code",
498
+ "execution_count": 15,
499
  "metadata": {},
500
  "outputs": [
501
  {
 
530
  " <tbody>\n",
531
  " <tr>\n",
532
  " <th>0</th>\n",
533
+ " <td>2024-05-13</td>\n",
534
+ " <td>170.00</td>\n",
535
+ " <td>175.4000</td>\n",
536
+ " <td>169.00</td>\n",
537
+ " <td>171.89</td>\n",
538
+ " <td>67018903.0</td>\n",
539
  " </tr>\n",
540
  " <tr>\n",
541
  " <th>1</th>\n",
542
+ " <td>2024-05-10</td>\n",
543
+ " <td>173.05</td>\n",
544
+ " <td>173.0599</td>\n",
545
+ " <td>167.75</td>\n",
546
+ " <td>168.47</td>\n",
547
+ " <td>72627178.0</td>\n",
548
  " </tr>\n",
549
  " <tr>\n",
550
  " <th>2</th>\n",
551
+ " <td>2024-05-09</td>\n",
552
+ " <td>175.01</td>\n",
553
+ " <td>175.6200</td>\n",
554
+ " <td>171.37</td>\n",
555
+ " <td>171.97</td>\n",
556
+ " <td>65950292.0</td>\n",
557
  " </tr>\n",
558
  " <tr>\n",
559
  " <th>3</th>\n",
560
+ " <td>2024-05-08</td>\n",
561
+ " <td>171.59</td>\n",
562
+ " <td>176.0600</td>\n",
563
+ " <td>170.15</td>\n",
564
+ " <td>174.72</td>\n",
565
+ " <td>79969488.0</td>\n",
566
  " </tr>\n",
567
  " <tr>\n",
568
  " <th>4</th>\n",
569
+ " <td>2024-05-07</td>\n",
570
+ " <td>182.40</td>\n",
571
+ " <td>183.2600</td>\n",
572
+ " <td>177.40</td>\n",
573
+ " <td>177.81</td>\n",
574
+ " <td>75045854.0</td>\n",
575
  " </tr>\n",
576
  " </tbody>\n",
577
  "</table>\n",
578
  "</div>"
579
  ],
580
  "text/plain": [
581
+ " date open high low close volume\n",
582
+ "0 2024-05-13 170.00 175.4000 169.00 171.89 67018903.0\n",
583
+ "1 2024-05-10 173.05 173.0599 167.75 168.47 72627178.0\n",
584
+ "2 2024-05-09 175.01 175.6200 171.37 171.97 65950292.0\n",
585
+ "3 2024-05-08 171.59 176.0600 170.15 174.72 79969488.0\n",
586
+ "4 2024-05-07 182.40 183.2600 177.40 177.81 75045854.0"
587
  ]
588
  },
589
+ "execution_count": 15,
590
  "metadata": {},
591
  "output_type": "execute_result"
592
  }
 
597
  },
598
  {
599
  "cell_type": "code",
600
+ "execution_count": 16,
601
  "metadata": {},
602
  "outputs": [
603
  {
604
  "name": "stdout",
605
  "output_type": "stream",
606
  "text": [
607
+ "2022-06-30 00:00:00\n",
608
+ "2024-05-13 00:00:00\n"
609
  ]
610
  }
611
  ],
 
616
  },
617
  {
618
  "cell_type": "code",
619
+ "execution_count": 17,
620
  "metadata": {},
621
  "outputs": [
622
  {
 
625
  "(470, 6)"
626
  ]
627
  },
628
+ "execution_count": 17,
629
  "metadata": {},
630
  "output_type": "execute_result"
631
  }
Stocks news prediction/Notebooks/5_feature_pipeline.ipynb CHANGED
@@ -2,13 +2,14 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [
8
  {
9
  "name": "stdout",
10
  "output_type": "stream",
11
  "text": [
 
12
  "Connected. Call `.close()` to terminate connection gracefully.\n",
13
  "\n",
14
  "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/693399\n",
@@ -42,19 +43,19 @@
42
  },
43
  {
44
  "cell_type": "code",
45
- "execution_count": 2,
46
  "metadata": {},
47
  "outputs": [
48
  {
49
  "name": "stdout",
50
  "output_type": "stream",
51
  "text": [
52
- " date 1. open 2. high 3. low 4. close 5. volume ticker\n",
53
- "0 2024-05-08 171.59 176.06 170.15 174.72 79969488.0 TSLA\n",
54
- "1 2024-05-07 182.40 183.26 177.40 177.81 75045854.0 TSLA\n",
55
- "2 2024-05-06 183.80 187.56 182.20 184.76 84390253.0 TSLA\n",
56
- "3 2024-05-03 182.10 184.78 178.42 181.19 75491539.0 TSLA\n",
57
- "4 2024-05-02 182.86 184.60 176.02 180.01 89148041.0 TSLA\n"
58
  ]
59
  }
60
  ],
@@ -66,7 +67,7 @@
66
  },
67
  {
68
  "cell_type": "code",
69
- "execution_count": 3,
70
  "metadata": {},
71
  "outputs": [],
72
  "source": [
@@ -79,7 +80,7 @@
79
  },
80
  {
81
  "cell_type": "code",
82
- "execution_count": 4,
83
  "metadata": {},
84
  "outputs": [
85
  {
@@ -115,52 +116,52 @@
115
  " <tbody>\n",
116
  " <tr>\n",
117
  " <th>0</th>\n",
118
- " <td>2024-05-08</td>\n",
119
- " <td>171.59</td>\n",
120
- " <td>176.0600</td>\n",
121
- " <td>170.15</td>\n",
122
- " <td>174.72</td>\n",
123
- " <td>79969488.0</td>\n",
124
  " <td>TSLA</td>\n",
125
  " </tr>\n",
126
  " <tr>\n",
127
  " <th>1</th>\n",
128
- " <td>2024-05-07</td>\n",
129
- " <td>182.40</td>\n",
130
- " <td>183.2600</td>\n",
131
- " <td>177.40</td>\n",
132
- " <td>177.81</td>\n",
133
- " <td>75045854.0</td>\n",
134
  " <td>TSLA</td>\n",
135
  " </tr>\n",
136
  " <tr>\n",
137
  " <th>2</th>\n",
138
- " <td>2024-05-06</td>\n",
139
- " <td>183.80</td>\n",
140
- " <td>187.5600</td>\n",
141
- " <td>182.20</td>\n",
142
- " <td>184.76</td>\n",
143
- " <td>84390253.0</td>\n",
144
  " <td>TSLA</td>\n",
145
  " </tr>\n",
146
  " <tr>\n",
147
  " <th>3</th>\n",
148
- " <td>2024-05-03</td>\n",
149
- " <td>182.10</td>\n",
150
- " <td>184.7800</td>\n",
151
- " <td>178.42</td>\n",
152
- " <td>181.19</td>\n",
153
- " <td>75491539.0</td>\n",
154
  " <td>TSLA</td>\n",
155
  " </tr>\n",
156
  " <tr>\n",
157
  " <th>4</th>\n",
158
- " <td>2024-05-02</td>\n",
159
- " <td>182.86</td>\n",
160
- " <td>184.6000</td>\n",
161
- " <td>176.02</td>\n",
162
- " <td>180.01</td>\n",
163
- " <td>89148041.0</td>\n",
164
  " <td>TSLA</td>\n",
165
  " </tr>\n",
166
  " <tr>\n",
@@ -174,7 +175,7 @@
174
  " <td>...</td>\n",
175
  " </tr>\n",
176
  " <tr>\n",
177
- " <th>3484</th>\n",
178
  " <td>2010-07-06</td>\n",
179
  " <td>20.00</td>\n",
180
  " <td>20.0000</td>\n",
@@ -184,7 +185,7 @@
184
  " <td>TSLA</td>\n",
185
  " </tr>\n",
186
  " <tr>\n",
187
- " <th>3485</th>\n",
188
  " <td>2010-07-02</td>\n",
189
  " <td>23.00</td>\n",
190
  " <td>23.1000</td>\n",
@@ -194,7 +195,7 @@
194
  " <td>TSLA</td>\n",
195
  " </tr>\n",
196
  " <tr>\n",
197
- " <th>3486</th>\n",
198
  " <td>2010-07-01</td>\n",
199
  " <td>25.00</td>\n",
200
  " <td>25.9200</td>\n",
@@ -204,7 +205,7 @@
204
  " <td>TSLA</td>\n",
205
  " </tr>\n",
206
  " <tr>\n",
207
- " <th>3487</th>\n",
208
  " <td>2010-06-30</td>\n",
209
  " <td>25.79</td>\n",
210
  " <td>30.4192</td>\n",
@@ -214,7 +215,7 @@
214
  " <td>TSLA</td>\n",
215
  " </tr>\n",
216
  " <tr>\n",
217
- " <th>3488</th>\n",
218
  " <td>2010-06-29</td>\n",
219
  " <td>19.00</td>\n",
220
  " <td>25.0000</td>\n",
@@ -225,27 +226,27 @@
225
  " </tr>\n",
226
  " </tbody>\n",
227
  "</table>\n",
228
- "<p>3489 rows × 7 columns</p>\n",
229
  "</div>"
230
  ],
231
  "text/plain": [
232
  " date 1. open 2. high 3. low 4. close 5. volume ticker\n",
233
- "0 2024-05-08 171.59 176.0600 170.15 174.72 79969488.0 TSLA\n",
234
- "1 2024-05-07 182.40 183.2600 177.40 177.81 75045854.0 TSLA\n",
235
- "2 2024-05-06 183.80 187.5600 182.20 184.76 84390253.0 TSLA\n",
236
- "3 2024-05-03 182.10 184.7800 178.42 181.19 75491539.0 TSLA\n",
237
- "4 2024-05-02 182.86 184.6000 176.02 180.01 89148041.0 TSLA\n",
238
  "... ... ... ... ... ... ... ...\n",
239
- "3484 2010-07-06 20.00 20.0000 15.83 16.11 6866900.0 TSLA\n",
240
- "3485 2010-07-02 23.00 23.1000 18.71 19.20 5139800.0 TSLA\n",
241
- "3486 2010-07-01 25.00 25.9200 20.27 21.96 8218800.0 TSLA\n",
242
- "3487 2010-06-30 25.79 30.4192 23.30 23.83 17187100.0 TSLA\n",
243
- "3488 2010-06-29 19.00 25.0000 17.54 23.89 18766300.0 TSLA\n",
244
  "\n",
245
- "[3489 rows x 7 columns]"
246
  ]
247
  },
248
- "execution_count": 4,
249
  "metadata": {},
250
  "output_type": "execute_result"
251
  }
@@ -256,7 +257,7 @@
256
  },
257
  {
258
  "cell_type": "code",
259
- "execution_count": 5,
260
  "metadata": {},
261
  "outputs": [
262
  {
@@ -275,7 +276,7 @@
275
  },
276
  {
277
  "cell_type": "code",
278
- "execution_count": 6,
279
  "metadata": {},
280
  "outputs": [],
281
  "source": [
@@ -285,14 +286,14 @@
285
  },
286
  {
287
  "cell_type": "code",
288
- "execution_count": 7,
289
  "metadata": {},
290
  "outputs": [
291
  {
292
  "name": "stdout",
293
  "output_type": "stream",
294
  "text": [
295
- "2024-05-09 13:46:25,296 WARNING: DeprecationWarning: Providing event_time as a single-element list is deprecated and will be dropped in future versions. Provide the feature_name string instead.\n",
296
  "\n"
297
  ]
298
  }
@@ -302,7 +303,7 @@
302
  "tesla_fg = fs.get_or_create_feature_group(\n",
303
  " name=\"tesla_stock\",\n",
304
  " description=\"Tesla stock dataset from alpha vantage\",\n",
305
- " version=3,\n",
306
  " primary_key=[\"ticker\"],\n",
307
  " event_time=['date'],\n",
308
  " online_enabled=False,\n",
@@ -311,7 +312,7 @@
311
  },
312
  {
313
  "cell_type": "code",
314
- "execution_count": 8,
315
  "metadata": {},
316
  "outputs": [
317
  {
@@ -319,18 +320,18 @@
319
  "output_type": "stream",
320
  "text": [
321
  "Feature Group created successfully, explore it at \n",
322
- "https://c.app.hopsworks.ai:443/p/693399/fs/689222/fg/797023\n"
323
  ]
324
  },
325
  {
326
  "data": {
327
  "application/vnd.jupyter.widget-view+json": {
328
- "model_id": "4ea750e675d14c1b8f6bd5b47dbc70cf",
329
  "version_major": 2,
330
  "version_minor": 0
331
  },
332
  "text/plain": [
333
- "Uploading Dataframe: 0.00% | | Rows 0/3489 | Elapsed Time: 00:00 | Remaining Time: ?"
334
  ]
335
  },
336
  "metadata": {},
@@ -340,18 +341,18 @@
340
  "name": "stdout",
341
  "output_type": "stream",
342
  "text": [
343
- "Launching job: tesla_stock_3_offline_fg_materialization\n",
344
  "Job started successfully, you can follow the progress at \n",
345
- "https://c.app.hopsworks.ai/p/693399/jobs/named/tesla_stock_3_offline_fg_materialization/executions\n"
346
  ]
347
  },
348
  {
349
  "data": {
350
  "text/plain": [
351
- "(<hsfs.core.job.Job at 0x1eb9c874450>, None)"
352
  ]
353
  },
354
- "execution_count": 8,
355
  "metadata": {},
356
  "output_type": "execute_result"
357
  }
@@ -363,7 +364,7 @@
363
  },
364
  {
365
  "cell_type": "code",
366
- "execution_count": 9,
367
  "metadata": {},
368
  "outputs": [],
369
  "source": [
@@ -373,7 +374,7 @@
373
  },
374
  {
375
  "cell_type": "code",
376
- "execution_count": 10,
377
  "metadata": {},
378
  "outputs": [],
379
  "source": [
@@ -383,7 +384,7 @@
383
  },
384
  {
385
  "cell_type": "code",
386
- "execution_count": 11,
387
  "metadata": {},
388
  "outputs": [],
389
  "source": [
@@ -393,14 +394,14 @@
393
  },
394
  {
395
  "cell_type": "code",
396
- "execution_count": 12,
397
  "metadata": {},
398
  "outputs": [
399
  {
400
  "name": "stdout",
401
  "output_type": "stream",
402
  "text": [
403
- "2024-05-09 13:55:13,441 WARNING: DeprecationWarning: Providing event_time as a single-element list is deprecated and will be dropped in future versions. Provide the feature_name string instead.\n",
404
  "\n"
405
  ]
406
  }
@@ -410,7 +411,7 @@
410
  "news_sentiment_fg = fs.get_or_create_feature_group(\n",
411
  " name='news_sentiment_updated',\n",
412
  " description='News sentiment from Polygon',\n",
413
- " version=3,\n",
414
  " primary_key=['ticker'],\n",
415
  " event_time=['date'],\n",
416
  " online_enabled=False,\n",
@@ -419,7 +420,7 @@
419
  },
420
  {
421
  "cell_type": "code",
422
- "execution_count": 13,
423
  "metadata": {},
424
  "outputs": [
425
  {
@@ -427,18 +428,18 @@
427
  "output_type": "stream",
428
  "text": [
429
  "Feature Group created successfully, explore it at \n",
430
- "https://c.app.hopsworks.ai:443/p/693399/fs/689222/fg/798028\n"
431
  ]
432
  },
433
  {
434
  "data": {
435
  "application/vnd.jupyter.widget-view+json": {
436
- "model_id": "de6777194865498cb560957a57a04bd1",
437
  "version_major": 2,
438
  "version_minor": 0
439
  },
440
  "text/plain": [
441
- "Uploading Dataframe: 0.00% | | Rows 0/75 | Elapsed Time: 00:00 | Remaining Time: ?"
442
  ]
443
  },
444
  "metadata": {},
@@ -448,18 +449,18 @@
448
  "name": "stdout",
449
  "output_type": "stream",
450
  "text": [
451
- "Launching job: news_sentiment_updated_3_offline_fg_materialization\n",
452
  "Job started successfully, you can follow the progress at \n",
453
- "https://c.app.hopsworks.ai/p/693399/jobs/named/news_sentiment_updated_3_offline_fg_materialization/executions\n"
454
  ]
455
  },
456
  {
457
  "data": {
458
  "text/plain": [
459
- "(<hsfs.core.job.Job at 0x1eb9c877fd0>, None)"
460
  ]
461
  },
462
- "execution_count": 13,
463
  "metadata": {},
464
  "output_type": "execute_result"
465
  }
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 14,
6
  "metadata": {},
7
  "outputs": [
8
  {
9
  "name": "stdout",
10
  "output_type": "stream",
11
  "text": [
12
+ "Connection closed.\n",
13
  "Connected. Call `.close()` to terminate connection gracefully.\n",
14
  "\n",
15
  "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/693399\n",
 
43
  },
44
  {
45
  "cell_type": "code",
46
+ "execution_count": 15,
47
  "metadata": {},
48
  "outputs": [
49
  {
50
  "name": "stdout",
51
  "output_type": "stream",
52
  "text": [
53
+ " date 1. open 2. high 3. low 4. close 5. volume ticker\n",
54
+ "0 2024-05-13 170.00 175.4000 169.00 171.89 67018903.0 TSLA\n",
55
+ "1 2024-05-10 173.05 173.0599 167.75 168.47 72627178.0 TSLA\n",
56
+ "2 2024-05-09 175.01 175.6200 171.37 171.97 65950292.0 TSLA\n",
57
+ "3 2024-05-08 171.59 176.0600 170.15 174.72 79969488.0 TSLA\n",
58
+ "4 2024-05-07 182.40 183.2600 177.40 177.81 75045854.0 TSLA\n"
59
  ]
60
  }
61
  ],
 
67
  },
68
  {
69
  "cell_type": "code",
70
+ "execution_count": 16,
71
  "metadata": {},
72
  "outputs": [],
73
  "source": [
 
80
  },
81
  {
82
  "cell_type": "code",
83
+ "execution_count": 17,
84
  "metadata": {},
85
  "outputs": [
86
  {
 
116
  " <tbody>\n",
117
  " <tr>\n",
118
  " <th>0</th>\n",
119
+ " <td>2024-05-13</td>\n",
120
+ " <td>170.00</td>\n",
121
+ " <td>175.4000</td>\n",
122
+ " <td>169.00</td>\n",
123
+ " <td>171.89</td>\n",
124
+ " <td>67018903.0</td>\n",
125
  " <td>TSLA</td>\n",
126
  " </tr>\n",
127
  " <tr>\n",
128
  " <th>1</th>\n",
129
+ " <td>2024-05-10</td>\n",
130
+ " <td>173.05</td>\n",
131
+ " <td>173.0599</td>\n",
132
+ " <td>167.75</td>\n",
133
+ " <td>168.47</td>\n",
134
+ " <td>72627178.0</td>\n",
135
  " <td>TSLA</td>\n",
136
  " </tr>\n",
137
  " <tr>\n",
138
  " <th>2</th>\n",
139
+ " <td>2024-05-09</td>\n",
140
+ " <td>175.01</td>\n",
141
+ " <td>175.6200</td>\n",
142
+ " <td>171.37</td>\n",
143
+ " <td>171.97</td>\n",
144
+ " <td>65950292.0</td>\n",
145
  " <td>TSLA</td>\n",
146
  " </tr>\n",
147
  " <tr>\n",
148
  " <th>3</th>\n",
149
+ " <td>2024-05-08</td>\n",
150
+ " <td>171.59</td>\n",
151
+ " <td>176.0600</td>\n",
152
+ " <td>170.15</td>\n",
153
+ " <td>174.72</td>\n",
154
+ " <td>79969488.0</td>\n",
155
  " <td>TSLA</td>\n",
156
  " </tr>\n",
157
  " <tr>\n",
158
  " <th>4</th>\n",
159
+ " <td>2024-05-07</td>\n",
160
+ " <td>182.40</td>\n",
161
+ " <td>183.2600</td>\n",
162
+ " <td>177.40</td>\n",
163
+ " <td>177.81</td>\n",
164
+ " <td>75045854.0</td>\n",
165
  " <td>TSLA</td>\n",
166
  " </tr>\n",
167
  " <tr>\n",
 
175
  " <td>...</td>\n",
176
  " </tr>\n",
177
  " <tr>\n",
178
+ " <th>3487</th>\n",
179
  " <td>2010-07-06</td>\n",
180
  " <td>20.00</td>\n",
181
  " <td>20.0000</td>\n",
 
185
  " <td>TSLA</td>\n",
186
  " </tr>\n",
187
  " <tr>\n",
188
+ " <th>3488</th>\n",
189
  " <td>2010-07-02</td>\n",
190
  " <td>23.00</td>\n",
191
  " <td>23.1000</td>\n",
 
195
  " <td>TSLA</td>\n",
196
  " </tr>\n",
197
  " <tr>\n",
198
+ " <th>3489</th>\n",
199
  " <td>2010-07-01</td>\n",
200
  " <td>25.00</td>\n",
201
  " <td>25.9200</td>\n",
 
205
  " <td>TSLA</td>\n",
206
  " </tr>\n",
207
  " <tr>\n",
208
+ " <th>3490</th>\n",
209
  " <td>2010-06-30</td>\n",
210
  " <td>25.79</td>\n",
211
  " <td>30.4192</td>\n",
 
215
  " <td>TSLA</td>\n",
216
  " </tr>\n",
217
  " <tr>\n",
218
+ " <th>3491</th>\n",
219
  " <td>2010-06-29</td>\n",
220
  " <td>19.00</td>\n",
221
  " <td>25.0000</td>\n",
 
226
  " </tr>\n",
227
  " </tbody>\n",
228
  "</table>\n",
229
+ "<p>3492 rows × 7 columns</p>\n",
230
  "</div>"
231
  ],
232
  "text/plain": [
233
  " date 1. open 2. high 3. low 4. close 5. volume ticker\n",
234
+ "0 2024-05-13 170.00 175.4000 169.00 171.89 67018903.0 TSLA\n",
235
+ "1 2024-05-10 173.05 173.0599 167.75 168.47 72627178.0 TSLA\n",
236
+ "2 2024-05-09 175.01 175.6200 171.37 171.97 65950292.0 TSLA\n",
237
+ "3 2024-05-08 171.59 176.0600 170.15 174.72 79969488.0 TSLA\n",
238
+ "4 2024-05-07 182.40 183.2600 177.40 177.81 75045854.0 TSLA\n",
239
  "... ... ... ... ... ... ... ...\n",
240
+ "3487 2010-07-06 20.00 20.0000 15.83 16.11 6866900.0 TSLA\n",
241
+ "3488 2010-07-02 23.00 23.1000 18.71 19.20 5139800.0 TSLA\n",
242
+ "3489 2010-07-01 25.00 25.9200 20.27 21.96 8218800.0 TSLA\n",
243
+ "3490 2010-06-30 25.79 30.4192 23.30 23.83 17187100.0 TSLA\n",
244
+ "3491 2010-06-29 19.00 25.0000 17.54 23.89 18766300.0 TSLA\n",
245
  "\n",
246
+ "[3492 rows x 7 columns]"
247
  ]
248
  },
249
+ "execution_count": 17,
250
  "metadata": {},
251
  "output_type": "execute_result"
252
  }
 
257
  },
258
  {
259
  "cell_type": "code",
260
+ "execution_count": 18,
261
  "metadata": {},
262
  "outputs": [
263
  {
 
276
  },
277
  {
278
  "cell_type": "code",
279
+ "execution_count": 19,
280
  "metadata": {},
281
  "outputs": [],
282
  "source": [
 
286
  },
287
  {
288
  "cell_type": "code",
289
+ "execution_count": 20,
290
  "metadata": {},
291
  "outputs": [
292
  {
293
  "name": "stdout",
294
  "output_type": "stream",
295
  "text": [
296
+ "2024-05-14 12:11:21,958 WARNING: DeprecationWarning: Providing event_time as a single-element list is deprecated and will be dropped in future versions. Provide the feature_name string instead.\n",
297
  "\n"
298
  ]
299
  }
 
303
  "tesla_fg = fs.get_or_create_feature_group(\n",
304
  " name=\"tesla_stock\",\n",
305
  " description=\"Tesla stock dataset from alpha vantage\",\n",
306
+ " version=5,\n",
307
  " primary_key=[\"ticker\"],\n",
308
  " event_time=['date'],\n",
309
  " online_enabled=False,\n",
 
312
  },
313
  {
314
  "cell_type": "code",
315
+ "execution_count": 21,
316
  "metadata": {},
317
  "outputs": [
318
  {
 
320
  "output_type": "stream",
321
  "text": [
322
  "Feature Group created successfully, explore it at \n",
323
+ "https://c.app.hopsworks.ai:443/p/693399/fs/689222/fg/813389\n"
324
  ]
325
  },
326
  {
327
  "data": {
328
  "application/vnd.jupyter.widget-view+json": {
329
+ "model_id": "b2adcace7aae475bbeb131bfc4b787ae",
330
  "version_major": 2,
331
  "version_minor": 0
332
  },
333
  "text/plain": [
334
+ "Uploading Dataframe: 0.00% | | Rows 0/3492 | Elapsed Time: 00:00 | Remaining Time: ?"
335
  ]
336
  },
337
  "metadata": {},
 
341
  "name": "stdout",
342
  "output_type": "stream",
343
  "text": [
344
+ "Launching job: tesla_stock_5_offline_fg_materialization\n",
345
  "Job started successfully, you can follow the progress at \n",
346
+ "https://c.app.hopsworks.ai/p/693399/jobs/named/tesla_stock_5_offline_fg_materialization/executions\n"
347
  ]
348
  },
349
  {
350
  "data": {
351
  "text/plain": [
352
+ "(<hsfs.core.job.Job at 0x194d7d8bc50>, None)"
353
  ]
354
  },
355
+ "execution_count": 21,
356
  "metadata": {},
357
  "output_type": "execute_result"
358
  }
 
364
  },
365
  {
366
  "cell_type": "code",
367
+ "execution_count": 22,
368
  "metadata": {},
369
  "outputs": [],
370
  "source": [
 
374
  },
375
  {
376
  "cell_type": "code",
377
+ "execution_count": 23,
378
  "metadata": {},
379
  "outputs": [],
380
  "source": [
 
384
  },
385
  {
386
  "cell_type": "code",
387
+ "execution_count": 24,
388
  "metadata": {},
389
  "outputs": [],
390
  "source": [
 
394
  },
395
  {
396
  "cell_type": "code",
397
+ "execution_count": 25,
398
  "metadata": {},
399
  "outputs": [
400
  {
401
  "name": "stdout",
402
  "output_type": "stream",
403
  "text": [
404
+ "2024-05-14 12:11:37,246 WARNING: DeprecationWarning: Providing event_time as a single-element list is deprecated and will be dropped in future versions. Provide the feature_name string instead.\n",
405
  "\n"
406
  ]
407
  }
 
411
  "news_sentiment_fg = fs.get_or_create_feature_group(\n",
412
  " name='news_sentiment_updated',\n",
413
  " description='News sentiment from Polygon',\n",
414
+ " version=5,\n",
415
  " primary_key=['ticker'],\n",
416
  " event_time=['date'],\n",
417
  " online_enabled=False,\n",
 
420
  },
421
  {
422
  "cell_type": "code",
423
+ "execution_count": 26,
424
  "metadata": {},
425
  "outputs": [
426
  {
 
428
  "output_type": "stream",
429
  "text": [
430
  "Feature Group created successfully, explore it at \n",
431
+ "https://c.app.hopsworks.ai:443/p/693399/fs/689222/fg/814412\n"
432
  ]
433
  },
434
  {
435
  "data": {
436
  "application/vnd.jupyter.widget-view+json": {
437
+ "model_id": "7ab9ec4276f44596ab0118a2ff53dbdc",
438
  "version_major": 2,
439
  "version_minor": 0
440
  },
441
  "text/plain": [
442
+ "Uploading Dataframe: 0.00% | | Rows 0/74 | Elapsed Time: 00:00 | Remaining Time: ?"
443
  ]
444
  },
445
  "metadata": {},
 
449
  "name": "stdout",
450
  "output_type": "stream",
451
  "text": [
452
+ "Launching job: news_sentiment_updated_5_offline_fg_materialization\n",
453
  "Job started successfully, you can follow the progress at \n",
454
+ "https://c.app.hopsworks.ai/p/693399/jobs/named/news_sentiment_updated_5_offline_fg_materialization/executions\n"
455
  ]
456
  },
457
  {
458
  "data": {
459
  "text/plain": [
460
+ "(<hsfs.core.job.Job at 0x194d7dab290>, None)"
461
  ]
462
  },
463
+ "execution_count": 26,
464
  "metadata": {},
465
  "output_type": "execute_result"
466
  }
Stocks news prediction/Notebooks/6_feature_view.ipynb CHANGED
@@ -5,29 +5,55 @@
5
  "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  {
9
  "data": {
10
  "application/vnd.jupyter.widget-view+json": {
11
- "model_id": "c9ba7f1115364637b09faef28c9fd7b4",
12
  "version_major": 2,
13
  "version_minor": 0
14
  },
15
  "text/plain": [
16
- "Uploading Dataframe: 0.00% | | Rows 0/3489 | Elapsed Time: 00:00 | Remaining Time: ?"
17
  ]
18
  },
19
  "metadata": {},
20
  "output_type": "display_data"
21
  },
 
 
 
 
 
 
 
 
 
22
  {
23
  "data": {
24
  "application/vnd.jupyter.widget-view+json": {
25
- "model_id": "5219f2e7391942b0a80d935fb1c03cd8",
26
  "version_major": 2,
27
  "version_minor": 0
28
  },
29
  "text/plain": [
30
- "Uploading Dataframe: 0.00% | | Rows 0/75 | Elapsed Time: 00:00 | Remaining Time: ?"
31
  ]
32
  },
33
  "metadata": {},
@@ -37,6 +63,9 @@
37
  "name": "stdout",
38
  "output_type": "stream",
39
  "text": [
 
 
 
40
  "Connection closed.\n",
41
  "Connected. Call `.close()` to terminate connection gracefully.\n",
42
  "\n",
@@ -78,8 +107,8 @@
78
  "def create_stocks_feature_view(fs, version):\n",
79
  "\n",
80
  " # Loading in the feature groups\n",
81
- " tesla_fg = fs.get_feature_group('tesla_stock', version=3)\n",
82
- " news_sentiment_fg = fs.get_feature_group('news_sentiment_updated', version=3)\n",
83
  "\n",
84
  " # Defining the query\n",
85
  " ds_query = tesla_fg.select(['date', 'open', 'ticker'])\\\n",
@@ -105,17 +134,17 @@
105
  "output_type": "stream",
106
  "text": [
107
  "Feature view created successfully, explore it at \n",
108
- "https://c.app.hopsworks.ai:443/p/693399/fs/689222/fv/tesla_stocks_fv/version/3\n"
109
  ]
110
  }
111
  ],
112
  "source": [
113
  "#Creating the feature view\n",
114
  "try:\n",
115
- " feature_view = fs.get_feature_view(\"tesla_stocks_fv\", version=3)\n",
116
- " tesla_fg = fs.get_feature_group('tesla_stock', version=3)\n",
117
  "except:\n",
118
- " feature_view, tesla_fg = create_stocks_feature_view(fs, 3)"
119
  ]
120
  },
121
  {
 
5
  "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "Connected. Call `.close()` to terminate connection gracefully.\n",
13
+ "\n",
14
+ "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/693399\n",
15
+ "Connected. Call `.close()` to terminate connection gracefully.\n",
16
+ " date 1. open 2. high 3. low 4. close 5. volume ticker\n",
17
+ "0 2024-05-13 170.00 175.4000 169.00 171.89 67018903.0 TSLA\n",
18
+ "1 2024-05-10 173.05 173.0599 167.75 168.47 72627178.0 TSLA\n",
19
+ "2 2024-05-09 175.01 175.6200 171.37 171.97 65950292.0 TSLA\n",
20
+ "3 2024-05-08 171.59 176.0600 170.15 174.72 79969488.0 TSLA\n",
21
+ "4 2024-05-07 182.40 183.2600 177.40 177.81 75045854.0 TSLA\n",
22
+ "Index(['date', 'open', 'high', 'low', 'close', 'volume', 'ticker'], dtype='object')\n"
23
+ ]
24
+ },
25
  {
26
  "data": {
27
  "application/vnd.jupyter.widget-view+json": {
28
+ "model_id": "aea133b66b924b1d9e2f35592658cc73",
29
  "version_major": 2,
30
  "version_minor": 0
31
  },
32
  "text/plain": [
33
+ "Uploading Dataframe: 0.00% | | Rows 0/3492 | Elapsed Time: 00:00 | Remaining Time: ?"
34
  ]
35
  },
36
  "metadata": {},
37
  "output_type": "display_data"
38
  },
39
+ {
40
+ "name": "stdout",
41
+ "output_type": "stream",
42
+ "text": [
43
+ "Launching job: tesla_stock_1_offline_fg_materialization\n",
44
+ "Job started successfully, you can follow the progress at \n",
45
+ "https://c.app.hopsworks.ai/p/693399/jobs/named/tesla_stock_1_offline_fg_materialization/executions\n"
46
+ ]
47
+ },
48
  {
49
  "data": {
50
  "application/vnd.jupyter.widget-view+json": {
51
+ "model_id": "b84d15d1c321483fb29e66b310fd95e2",
52
  "version_major": 2,
53
  "version_minor": 0
54
  },
55
  "text/plain": [
56
+ "Uploading Dataframe: 0.00% | | Rows 0/74 | Elapsed Time: 00:00 | Remaining Time: ?"
57
  ]
58
  },
59
  "metadata": {},
 
63
  "name": "stdout",
64
  "output_type": "stream",
65
  "text": [
66
+ "Launching job: news_sentiment_updated_1_offline_fg_materialization\n",
67
+ "Job started successfully, you can follow the progress at \n",
68
+ "https://c.app.hopsworks.ai/p/693399/jobs/named/news_sentiment_updated_1_offline_fg_materialization/executions\n",
69
  "Connection closed.\n",
70
  "Connected. Call `.close()` to terminate connection gracefully.\n",
71
  "\n",
 
107
  "def create_stocks_feature_view(fs, version):\n",
108
  "\n",
109
  " # Loading in the feature groups\n",
110
+ " tesla_fg = fs.get_feature_group('tesla_stock', version=5)\n",
111
+ " news_sentiment_fg = fs.get_feature_group('news_sentiment_updated', version=5)\n",
112
  "\n",
113
  " # Defining the query\n",
114
  " ds_query = tesla_fg.select(['date', 'open', 'ticker'])\\\n",
 
134
  "output_type": "stream",
135
  "text": [
136
  "Feature view created successfully, explore it at \n",
137
+ "https://c.app.hopsworks.ai:443/p/693399/fs/689222/fv/tesla_stocks_fv/version/5\n"
138
  ]
139
  }
140
  ],
141
  "source": [
142
  "#Creating the feature view\n",
143
  "try:\n",
144
+ " feature_view = fs.get_feature_view(\"tesla_stocks_fv\", version=5)\n",
145
+ " tesla_fg = fs.get_feature_group('tesla_stock', version=5)\n",
146
  "except:\n",
147
+ " feature_view, tesla_fg = create_stocks_feature_view(fs, 5)"
148
  ]
149
  },
150
  {
Stocks news prediction/Notebooks/7_training_pipeline.ipynb CHANGED
@@ -2,14 +2,21 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [
8
  {
9
  "name": "stdout",
10
  "output_type": "stream",
11
  "text": [
12
- "Connected. Call `.close()` to terminate connection gracefully.\n",
 
 
 
 
 
 
 
13
  "\n",
14
  "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/693399\n",
15
  "Connected. Call `.close()` to terminate connection gracefully.\n",
@@ -50,20 +57,20 @@
50
  },
51
  {
52
  "cell_type": "code",
53
- "execution_count": 2,
54
  "metadata": {},
55
  "outputs": [],
56
  "source": [
57
  "#Getting the feature view\n",
58
  "feature_view = fs.get_feature_view(\n",
59
  " name='tesla_stocks_fv',\n",
60
- " version=3\n",
61
  ")"
62
  ]
63
  },
64
  {
65
  "cell_type": "code",
66
- "execution_count": 3,
67
  "metadata": {},
68
  "outputs": [],
69
  "source": [
@@ -77,7 +84,7 @@
77
  },
78
  {
79
  "cell_type": "code",
80
- "execution_count": 4,
81
  "metadata": {},
82
  "outputs": [
83
  {
@@ -85,18 +92,18 @@
85
  "output_type": "stream",
86
  "text": [
87
  "Training dataset job started successfully, you can follow the progress at \n",
88
- "https://c.app.hopsworks.ai/p/693399/jobs/named/tesla_stocks_fv_3_create_fv_td_09052024135613/executions\n",
89
- "2024-05-09 15:57:26,415 WARNING: VersionWarning: Incremented version to `3`.\n",
90
  "\n"
91
  ]
92
  },
93
  {
94
  "data": {
95
  "text/plain": [
96
- "(3, <hsfs.core.job.Job at 0x2c40676f650>)"
97
  ]
98
  },
99
- "execution_count": 4,
100
  "metadata": {},
101
  "output_type": "execute_result"
102
  }
@@ -115,17 +122,17 @@
115
  },
116
  {
117
  "cell_type": "code",
118
- "execution_count": 5,
119
  "metadata": {},
120
  "outputs": [],
121
  "source": [
122
  "#Collecting the split from feature view\n",
123
- "X_train, X_test, y_train, y_test = feature_view.get_train_test_split(3)"
124
  ]
125
  },
126
  {
127
  "cell_type": "code",
128
- "execution_count": 6,
129
  "metadata": {},
130
  "outputs": [
131
  {
@@ -159,31 +166,31 @@
159
  " <th>0</th>\n",
160
  " <td>2022-12-14T00:00:00.000Z</td>\n",
161
  " <td>TSLA</td>\n",
162
- " <td>0.046415</td>\n",
163
  " </tr>\n",
164
  " <tr>\n",
165
  " <th>1</th>\n",
166
  " <td>2023-02-21T00:00:00.000Z</td>\n",
167
  " <td>TSLA</td>\n",
168
- " <td>0.096789</td>\n",
169
  " </tr>\n",
170
  " <tr>\n",
171
  " <th>2</th>\n",
172
  " <td>2023-08-17T00:00:00.000Z</td>\n",
173
  " <td>TSLA</td>\n",
174
- " <td>0.145765</td>\n",
175
  " </tr>\n",
176
  " <tr>\n",
177
  " <th>3</th>\n",
178
  " <td>2022-09-16T00:00:00.000Z</td>\n",
179
  " <td>TSLA</td>\n",
180
- " <td>0.094337</td>\n",
181
  " </tr>\n",
182
  " <tr>\n",
183
  " <th>4</th>\n",
184
  " <td>2023-08-28T00:00:00.000Z</td>\n",
185
  " <td>TSLA</td>\n",
186
- " <td>0.145765</td>\n",
187
  " </tr>\n",
188
  " <tr>\n",
189
  " <th>...</th>\n",
@@ -192,58 +199,58 @@
192
  " <td>...</td>\n",
193
  " </tr>\n",
194
  " <tr>\n",
195
- " <th>377</th>\n",
196
  " <td>2023-02-10T00:00:00.000Z</td>\n",
197
  " <td>TSLA</td>\n",
198
- " <td>0.096789</td>\n",
199
  " </tr>\n",
200
  " <tr>\n",
201
- " <th>378</th>\n",
202
  " <td>2023-05-08T00:00:00.000Z</td>\n",
203
  " <td>TSLA</td>\n",
204
- " <td>-0.006389</td>\n",
205
  " </tr>\n",
206
  " <tr>\n",
207
- " <th>379</th>\n",
208
  " <td>2022-09-08T00:00:00.000Z</td>\n",
209
  " <td>TSLA</td>\n",
210
- " <td>0.094337</td>\n",
211
  " </tr>\n",
212
  " <tr>\n",
213
- " <th>380</th>\n",
214
  " <td>2023-07-06T00:00:00.000Z</td>\n",
215
  " <td>TSLA</td>\n",
216
- " <td>0.174215</td>\n",
217
  " </tr>\n",
218
  " <tr>\n",
219
- " <th>381</th>\n",
220
  " <td>2023-10-27T00:00:00.000Z</td>\n",
221
  " <td>TSLA</td>\n",
222
- " <td>0.031260</td>\n",
223
  " </tr>\n",
224
  " </tbody>\n",
225
  "</table>\n",
226
- "<p>382 rows × 3 columns</p>\n",
227
  "</div>"
228
  ],
229
  "text/plain": [
230
  " date ticker sentiment\n",
231
- "0 2022-12-14T00:00:00.000Z TSLA 0.046415\n",
232
- "1 2023-02-21T00:00:00.000Z TSLA 0.096789\n",
233
- "2 2023-08-17T00:00:00.000Z TSLA 0.145765\n",
234
- "3 2022-09-16T00:00:00.000Z TSLA 0.094337\n",
235
- "4 2023-08-28T00:00:00.000Z TSLA 0.145765\n",
236
  ".. ... ... ...\n",
237
- "377 2023-02-10T00:00:00.000Z TSLA 0.096789\n",
238
- "378 2023-05-08T00:00:00.000Z TSLA -0.006389\n",
239
- "379 2022-09-08T00:00:00.000Z TSLA 0.094337\n",
240
- "380 2023-07-06T00:00:00.000Z TSLA 0.174215\n",
241
- "381 2023-10-27T00:00:00.000Z TSLA 0.031260\n",
242
  "\n",
243
- "[382 rows x 3 columns]"
244
  ]
245
  },
246
- "execution_count": 6,
247
  "metadata": {},
248
  "output_type": "execute_result"
249
  }
@@ -255,7 +262,7 @@
255
  },
256
  {
257
  "cell_type": "code",
258
- "execution_count": 7,
259
  "metadata": {},
260
  "outputs": [],
261
  "source": [
@@ -268,7 +275,7 @@
268
  },
269
  {
270
  "cell_type": "code",
271
- "execution_count": 8,
272
  "metadata": {},
273
  "outputs": [
274
  {
@@ -302,31 +309,31 @@
302
  " <th>0</th>\n",
303
  " <td>2022-12-14</td>\n",
304
  " <td>TSLA</td>\n",
305
- " <td>0.046415</td>\n",
306
  " </tr>\n",
307
  " <tr>\n",
308
  " <th>1</th>\n",
309
  " <td>2023-02-21</td>\n",
310
  " <td>TSLA</td>\n",
311
- " <td>0.096789</td>\n",
312
  " </tr>\n",
313
  " <tr>\n",
314
  " <th>2</th>\n",
315
  " <td>2023-08-17</td>\n",
316
  " <td>TSLA</td>\n",
317
- " <td>0.145765</td>\n",
318
  " </tr>\n",
319
  " <tr>\n",
320
  " <th>3</th>\n",
321
  " <td>2022-09-16</td>\n",
322
  " <td>TSLA</td>\n",
323
- " <td>0.094337</td>\n",
324
  " </tr>\n",
325
  " <tr>\n",
326
  " <th>4</th>\n",
327
  " <td>2023-08-28</td>\n",
328
  " <td>TSLA</td>\n",
329
- " <td>0.145765</td>\n",
330
  " </tr>\n",
331
  " </tbody>\n",
332
  "</table>\n",
@@ -334,14 +341,14 @@
334
  ],
335
  "text/plain": [
336
  " date ticker sentiment\n",
337
- "0 2022-12-14 TSLA 0.046415\n",
338
- "1 2023-02-21 TSLA 0.096789\n",
339
- "2 2023-08-17 TSLA 0.145765\n",
340
- "3 2022-09-16 TSLA 0.094337\n",
341
- "4 2023-08-28 TSLA 0.145765"
342
  ]
343
  },
344
- "execution_count": 8,
345
  "metadata": {},
346
  "output_type": "execute_result"
347
  }
@@ -352,7 +359,7 @@
352
  },
353
  {
354
  "cell_type": "code",
355
- "execution_count": 9,
356
  "metadata": {},
357
  "outputs": [],
358
  "source": [
@@ -377,7 +384,7 @@
377
  },
378
  {
379
  "cell_type": "code",
380
- "execution_count": 10,
381
  "metadata": {},
382
  "outputs": [
383
  {
@@ -410,31 +417,31 @@
410
  " <tr>\n",
411
  " <th>0</th>\n",
412
  " <td>2022-12-14</td>\n",
413
- " <td>0.046415</td>\n",
414
  " <td>1.0</td>\n",
415
  " </tr>\n",
416
  " <tr>\n",
417
  " <th>1</th>\n",
418
  " <td>2023-02-21</td>\n",
419
- " <td>0.096789</td>\n",
420
  " <td>1.0</td>\n",
421
  " </tr>\n",
422
  " <tr>\n",
423
  " <th>2</th>\n",
424
  " <td>2023-08-17</td>\n",
425
- " <td>0.145765</td>\n",
426
  " <td>1.0</td>\n",
427
  " </tr>\n",
428
  " <tr>\n",
429
  " <th>3</th>\n",
430
  " <td>2022-09-16</td>\n",
431
- " <td>0.094337</td>\n",
432
  " <td>1.0</td>\n",
433
  " </tr>\n",
434
  " <tr>\n",
435
  " <th>4</th>\n",
436
  " <td>2023-08-28</td>\n",
437
- " <td>0.145765</td>\n",
438
  " <td>1.0</td>\n",
439
  " </tr>\n",
440
  " </tbody>\n",
@@ -443,14 +450,14 @@
443
  ],
444
  "text/plain": [
445
  " date sentiment ticker_TSLA\n",
446
- "0 2022-12-14 0.046415 1.0\n",
447
- "1 2023-02-21 0.096789 1.0\n",
448
- "2 2023-08-17 0.145765 1.0\n",
449
- "3 2022-09-16 0.094337 1.0\n",
450
- "4 2023-08-28 0.145765 1.0"
451
  ]
452
  },
453
- "execution_count": 10,
454
  "metadata": {},
455
  "output_type": "execute_result"
456
  }
@@ -462,7 +469,7 @@
462
  },
463
  {
464
  "cell_type": "code",
465
- "execution_count": 11,
466
  "metadata": {},
467
  "outputs": [],
468
  "source": [
@@ -488,7 +495,7 @@
488
  },
489
  {
490
  "cell_type": "code",
491
- "execution_count": 12,
492
  "metadata": {},
493
  "outputs": [],
494
  "source": [
@@ -496,24 +503,24 @@
496
  "scaler = MinMaxScaler()\n",
497
  "\n",
498
  "# Fitting and transforming the 'open' column\n",
499
- "y_train['open_scaled'] = scaler.fit_transform(y_train[['open']])\n",
500
- "y_train.drop('open', axis=1, inplace=True)"
501
  ]
502
  },
503
  {
504
  "cell_type": "code",
505
- "execution_count": 13,
506
  "metadata": {},
507
  "outputs": [],
508
  "source": [
509
  "#Doing the same to y_test as done to y_train \n",
510
- "y_test['open_scaled'] = scaler.fit_transform(y_test[['open']])\n",
511
- "y_test.drop('open', axis=1, inplace=True)"
512
  ]
513
  },
514
  {
515
  "cell_type": "code",
516
- "execution_count": 14,
517
  "metadata": {},
518
  "outputs": [],
519
  "source": [
@@ -560,14 +567,14 @@
560
  },
561
  {
562
  "cell_type": "code",
563
- "execution_count": 15,
564
  "metadata": {},
565
  "outputs": [
566
  {
567
  "name": "stdout",
568
  "output_type": "stream",
569
  "text": [
570
- "2024-05-09 15:57:31,170 WARNING: DeprecationWarning: np.find_common_type is deprecated. Please use `np.result_type` or `np.promote_types`.\n",
571
  "See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information. (Deprecated NumPy 1.25)\n",
572
  "\n"
573
  ]
@@ -591,7 +598,7 @@
591
  },
592
  {
593
  "cell_type": "code",
594
- "execution_count": 16,
595
  "metadata": {},
596
  "outputs": [],
597
  "source": [
@@ -610,23 +617,23 @@
610
  },
611
  {
612
  "cell_type": "code",
613
- "execution_count": 17,
614
  "metadata": {},
615
  "outputs": [
616
  {
617
  "name": "stdout",
618
  "output_type": "stream",
619
  "text": [
620
- "\u001b[1m12/12\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 6ms/step - loss: 0.5527\n"
621
  ]
622
  },
623
  {
624
  "data": {
625
  "text/plain": [
626
- "<keras.src.callbacks.history.History at 0x2c40654db90>"
627
  ]
628
  },
629
- "execution_count": 17,
630
  "metadata": {},
631
  "output_type": "execute_result"
632
  }
@@ -638,14 +645,14 @@
638
  },
639
  {
640
  "cell_type": "code",
641
- "execution_count": 18,
642
  "metadata": {},
643
  "outputs": [
644
  {
645
  "name": "stdout",
646
  "output_type": "stream",
647
  "text": [
648
- "2024-05-09 15:57:39,537 WARNING: DeprecationWarning: np.find_common_type is deprecated. Please use `np.result_type` or `np.promote_types`.\n",
649
  "See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information. (Deprecated NumPy 1.25)\n",
650
  "\n"
651
  ]
@@ -669,15 +676,15 @@
669
  },
670
  {
671
  "cell_type": "code",
672
- "execution_count": 19,
673
  "metadata": {},
674
  "outputs": [
675
  {
676
  "name": "stdout",
677
  "output_type": "stream",
678
  "text": [
679
- "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 289ms/step\n",
680
- "Root Mean Squared Error (RMSE): 0.3890514762715459\n"
681
  ]
682
  }
683
  ],
@@ -696,7 +703,7 @@
696
  },
697
  {
698
  "cell_type": "code",
699
- "execution_count": 20,
700
  "metadata": {},
701
  "outputs": [
702
  {
@@ -714,16 +721,147 @@
714
  },
715
  {
716
  "cell_type": "code",
717
- "execution_count": 21,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
718
  "metadata": {},
719
  "outputs": [
720
  {
721
  "data": {
722
  "text/plain": [
723
- "{'RMSE': 0.3890514762715459}"
724
  ]
725
  },
726
- "execution_count": 21,
727
  "metadata": {},
728
  "output_type": "execute_result"
729
  }
@@ -737,7 +875,7 @@
737
  },
738
  {
739
  "cell_type": "code",
740
- "execution_count": 22,
741
  "metadata": {},
742
  "outputs": [],
743
  "source": [
@@ -749,7 +887,7 @@
749
  },
750
  {
751
  "cell_type": "code",
752
- "execution_count": 23,
753
  "metadata": {},
754
  "outputs": [],
755
  "source": [
@@ -761,7 +899,7 @@
761
  },
762
  {
763
  "cell_type": "code",
764
- "execution_count": 24,
765
  "metadata": {},
766
  "outputs": [],
767
  "source": [
@@ -778,7 +916,7 @@
778
  },
779
  {
780
  "cell_type": "code",
781
- "execution_count": 25,
782
  "metadata": {},
783
  "outputs": [
784
  {
@@ -791,7 +929,7 @@
791
  {
792
  "data": {
793
  "application/vnd.jupyter.widget-view+json": {
794
- "model_id": "1dedc19846554edfa0714bb726df6092",
795
  "version_major": 2,
796
  "version_minor": 0
797
  },
@@ -805,12 +943,12 @@
805
  {
806
  "data": {
807
  "application/vnd.jupyter.widget-view+json": {
808
- "model_id": "36e5eea5af884099995945a563169b32",
809
  "version_major": 2,
810
  "version_minor": 0
811
  },
812
  "text/plain": [
813
- "Uploading: 0.000%| | 0/291253 elapsed<00:00 remaining<?"
814
  ]
815
  },
816
  "metadata": {},
@@ -820,7 +958,7 @@
820
  "name": "stdout",
821
  "output_type": "stream",
822
  "text": [
823
- "2024-05-09 15:57:52,172 WARNING: DeprecationWarning: np.find_common_type is deprecated. Please use `np.result_type` or `np.promote_types`.\n",
824
  "See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information. (Deprecated NumPy 1.25)\n",
825
  "\n"
826
  ]
@@ -828,12 +966,12 @@
828
  {
829
  "data": {
830
  "application/vnd.jupyter.widget-view+json": {
831
- "model_id": "713ec1f5a8674fa0a6ef7be3fd0dd275",
832
  "version_major": 2,
833
  "version_minor": 0
834
  },
835
  "text/plain": [
836
- "Uploading: 0.000%| | 0/45 elapsed<00:00 remaining<?"
837
  ]
838
  },
839
  "metadata": {},
@@ -842,12 +980,12 @@
842
  {
843
  "data": {
844
  "application/vnd.jupyter.widget-view+json": {
845
- "model_id": "3e6a7a92d3314e5b9ab276c0d2158c70",
846
  "version_major": 2,
847
  "version_minor": 0
848
  },
849
  "text/plain": [
850
- "Uploading: 0.000%| | 0/561 elapsed<00:00 remaining<?"
851
  ]
852
  },
853
  "metadata": {},
@@ -857,7 +995,7 @@
857
  "name": "stdout",
858
  "output_type": "stream",
859
  "text": [
860
- "Model created, explore it at https://c.app.hopsworks.ai:443/p/693399/models/stock_pred_model/16\n"
861
  ]
862
  }
863
  ],
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 37,
6
  "metadata": {},
7
  "outputs": [
8
  {
9
  "name": "stdout",
10
  "output_type": "stream",
11
  "text": [
12
+ "Connection closed.\n",
13
+ "Connected. Call `.close()` to terminate connection gracefully.\n"
14
+ ]
15
+ },
16
+ {
17
+ "name": "stdout",
18
+ "output_type": "stream",
19
+ "text": [
20
  "\n",
21
  "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/693399\n",
22
  "Connected. Call `.close()` to terminate connection gracefully.\n",
 
57
  },
58
  {
59
  "cell_type": "code",
60
+ "execution_count": 38,
61
  "metadata": {},
62
  "outputs": [],
63
  "source": [
64
  "#Getting the feature view\n",
65
  "feature_view = fs.get_feature_view(\n",
66
  " name='tesla_stocks_fv',\n",
67
+ " version=5\n",
68
  ")"
69
  ]
70
  },
71
  {
72
  "cell_type": "code",
73
+ "execution_count": 39,
74
  "metadata": {},
75
  "outputs": [],
76
  "source": [
 
84
  },
85
  {
86
  "cell_type": "code",
87
+ "execution_count": 40,
88
  "metadata": {},
89
  "outputs": [
90
  {
 
92
  "output_type": "stream",
93
  "text": [
94
  "Training dataset job started successfully, you can follow the progress at \n",
95
+ "https://c.app.hopsworks.ai/p/693399/jobs/named/tesla_stocks_fv_5_create_fv_td_14052024101636/executions\n",
96
+ "2024-05-14 12:18:32,042 WARNING: VersionWarning: Incremented version to `1`.\n",
97
  "\n"
98
  ]
99
  },
100
  {
101
  "data": {
102
  "text/plain": [
103
+ "(1, <hsfs.core.job.Job at 0x285881c0690>)"
104
  ]
105
  },
106
+ "execution_count": 40,
107
  "metadata": {},
108
  "output_type": "execute_result"
109
  }
 
122
  },
123
  {
124
  "cell_type": "code",
125
+ "execution_count": 42,
126
  "metadata": {},
127
  "outputs": [],
128
  "source": [
129
  "#Collecting the split from feature view\n",
130
+ "X_train, X_test, y_train, y_test = feature_view.get_train_test_split(1)"
131
  ]
132
  },
133
  {
134
  "cell_type": "code",
135
+ "execution_count": 43,
136
  "metadata": {},
137
  "outputs": [
138
  {
 
166
  " <th>0</th>\n",
167
  " <td>2022-12-14T00:00:00.000Z</td>\n",
168
  " <td>TSLA</td>\n",
169
+ " <td>0.091856</td>\n",
170
  " </tr>\n",
171
  " <tr>\n",
172
  " <th>1</th>\n",
173
  " <td>2023-02-21T00:00:00.000Z</td>\n",
174
  " <td>TSLA</td>\n",
175
+ " <td>0.080574</td>\n",
176
  " </tr>\n",
177
  " <tr>\n",
178
  " <th>2</th>\n",
179
  " <td>2023-08-17T00:00:00.000Z</td>\n",
180
  " <td>TSLA</td>\n",
181
+ " <td>0.214102</td>\n",
182
  " </tr>\n",
183
  " <tr>\n",
184
  " <th>3</th>\n",
185
  " <td>2022-09-16T00:00:00.000Z</td>\n",
186
  " <td>TSLA</td>\n",
187
+ " <td>0.114323</td>\n",
188
  " </tr>\n",
189
  " <tr>\n",
190
  " <th>4</th>\n",
191
  " <td>2023-08-28T00:00:00.000Z</td>\n",
192
  " <td>TSLA</td>\n",
193
+ " <td>0.214102</td>\n",
194
  " </tr>\n",
195
  " <tr>\n",
196
  " <th>...</th>\n",
 
199
  " <td>...</td>\n",
200
  " </tr>\n",
201
  " <tr>\n",
202
+ " <th>374</th>\n",
203
  " <td>2023-02-10T00:00:00.000Z</td>\n",
204
  " <td>TSLA</td>\n",
205
+ " <td>0.080574</td>\n",
206
  " </tr>\n",
207
  " <tr>\n",
208
+ " <th>375</th>\n",
209
  " <td>2023-05-08T00:00:00.000Z</td>\n",
210
  " <td>TSLA</td>\n",
211
+ " <td>0.011806</td>\n",
212
  " </tr>\n",
213
  " <tr>\n",
214
+ " <th>376</th>\n",
215
  " <td>2022-09-08T00:00:00.000Z</td>\n",
216
  " <td>TSLA</td>\n",
217
+ " <td>0.114323</td>\n",
218
  " </tr>\n",
219
  " <tr>\n",
220
+ " <th>377</th>\n",
221
  " <td>2023-07-06T00:00:00.000Z</td>\n",
222
  " <td>TSLA</td>\n",
223
+ " <td>0.150893</td>\n",
224
  " </tr>\n",
225
  " <tr>\n",
226
+ " <th>378</th>\n",
227
  " <td>2023-10-27T00:00:00.000Z</td>\n",
228
  " <td>TSLA</td>\n",
229
+ " <td>0.068181</td>\n",
230
  " </tr>\n",
231
  " </tbody>\n",
232
  "</table>\n",
233
+ "<p>379 rows × 3 columns</p>\n",
234
  "</div>"
235
  ],
236
  "text/plain": [
237
  " date ticker sentiment\n",
238
+ "0 2022-12-14T00:00:00.000Z TSLA 0.091856\n",
239
+ "1 2023-02-21T00:00:00.000Z TSLA 0.080574\n",
240
+ "2 2023-08-17T00:00:00.000Z TSLA 0.214102\n",
241
+ "3 2022-09-16T00:00:00.000Z TSLA 0.114323\n",
242
+ "4 2023-08-28T00:00:00.000Z TSLA 0.214102\n",
243
  ".. ... ... ...\n",
244
+ "374 2023-02-10T00:00:00.000Z TSLA 0.080574\n",
245
+ "375 2023-05-08T00:00:00.000Z TSLA 0.011806\n",
246
+ "376 2022-09-08T00:00:00.000Z TSLA 0.114323\n",
247
+ "377 2023-07-06T00:00:00.000Z TSLA 0.150893\n",
248
+ "378 2023-10-27T00:00:00.000Z TSLA 0.068181\n",
249
  "\n",
250
+ "[379 rows x 3 columns]"
251
  ]
252
  },
253
+ "execution_count": 43,
254
  "metadata": {},
255
  "output_type": "execute_result"
256
  }
 
262
  },
263
  {
264
  "cell_type": "code",
265
+ "execution_count": 44,
266
  "metadata": {},
267
  "outputs": [],
268
  "source": [
 
275
  },
276
  {
277
  "cell_type": "code",
278
+ "execution_count": 45,
279
  "metadata": {},
280
  "outputs": [
281
  {
 
309
  " <th>0</th>\n",
310
  " <td>2022-12-14</td>\n",
311
  " <td>TSLA</td>\n",
312
+ " <td>0.091856</td>\n",
313
  " </tr>\n",
314
  " <tr>\n",
315
  " <th>1</th>\n",
316
  " <td>2023-02-21</td>\n",
317
  " <td>TSLA</td>\n",
318
+ " <td>0.080574</td>\n",
319
  " </tr>\n",
320
  " <tr>\n",
321
  " <th>2</th>\n",
322
  " <td>2023-08-17</td>\n",
323
  " <td>TSLA</td>\n",
324
+ " <td>0.214102</td>\n",
325
  " </tr>\n",
326
  " <tr>\n",
327
  " <th>3</th>\n",
328
  " <td>2022-09-16</td>\n",
329
  " <td>TSLA</td>\n",
330
+ " <td>0.114323</td>\n",
331
  " </tr>\n",
332
  " <tr>\n",
333
  " <th>4</th>\n",
334
  " <td>2023-08-28</td>\n",
335
  " <td>TSLA</td>\n",
336
+ " <td>0.214102</td>\n",
337
  " </tr>\n",
338
  " </tbody>\n",
339
  "</table>\n",
 
341
  ],
342
  "text/plain": [
343
  " date ticker sentiment\n",
344
+ "0 2022-12-14 TSLA 0.091856\n",
345
+ "1 2023-02-21 TSLA 0.080574\n",
346
+ "2 2023-08-17 TSLA 0.214102\n",
347
+ "3 2022-09-16 TSLA 0.114323\n",
348
+ "4 2023-08-28 TSLA 0.214102"
349
  ]
350
  },
351
+ "execution_count": 45,
352
  "metadata": {},
353
  "output_type": "execute_result"
354
  }
 
359
  },
360
  {
361
  "cell_type": "code",
362
+ "execution_count": 46,
363
  "metadata": {},
364
  "outputs": [],
365
  "source": [
 
384
  },
385
  {
386
  "cell_type": "code",
387
+ "execution_count": 47,
388
  "metadata": {},
389
  "outputs": [
390
  {
 
417
  " <tr>\n",
418
  " <th>0</th>\n",
419
  " <td>2022-12-14</td>\n",
420
+ " <td>0.091856</td>\n",
421
  " <td>1.0</td>\n",
422
  " </tr>\n",
423
  " <tr>\n",
424
  " <th>1</th>\n",
425
  " <td>2023-02-21</td>\n",
426
+ " <td>0.080574</td>\n",
427
  " <td>1.0</td>\n",
428
  " </tr>\n",
429
  " <tr>\n",
430
  " <th>2</th>\n",
431
  " <td>2023-08-17</td>\n",
432
+ " <td>0.214102</td>\n",
433
  " <td>1.0</td>\n",
434
  " </tr>\n",
435
  " <tr>\n",
436
  " <th>3</th>\n",
437
  " <td>2022-09-16</td>\n",
438
+ " <td>0.114323</td>\n",
439
  " <td>1.0</td>\n",
440
  " </tr>\n",
441
  " <tr>\n",
442
  " <th>4</th>\n",
443
  " <td>2023-08-28</td>\n",
444
+ " <td>0.214102</td>\n",
445
  " <td>1.0</td>\n",
446
  " </tr>\n",
447
  " </tbody>\n",
 
450
  ],
451
  "text/plain": [
452
  " date sentiment ticker_TSLA\n",
453
+ "0 2022-12-14 0.091856 1.0\n",
454
+ "1 2023-02-21 0.080574 1.0\n",
455
+ "2 2023-08-17 0.214102 1.0\n",
456
+ "3 2022-09-16 0.114323 1.0\n",
457
+ "4 2023-08-28 0.214102 1.0"
458
  ]
459
  },
460
+ "execution_count": 47,
461
  "metadata": {},
462
  "output_type": "execute_result"
463
  }
 
469
  },
470
  {
471
  "cell_type": "code",
472
+ "execution_count": 48,
473
  "metadata": {},
474
  "outputs": [],
475
  "source": [
 
495
  },
496
  {
497
  "cell_type": "code",
498
+ "execution_count": 49,
499
  "metadata": {},
500
  "outputs": [],
501
  "source": [
 
503
  "scaler = MinMaxScaler()\n",
504
  "\n",
505
  "# Fitting and transforming the 'open' column\n",
506
+ "#y_train['open_scaled'] = scaler.fit_transform(y_train[['open']])\n",
507
+ "#y_train.drop('open', axis=1, inplace=True)"
508
  ]
509
  },
510
  {
511
  "cell_type": "code",
512
+ "execution_count": 50,
513
  "metadata": {},
514
  "outputs": [],
515
  "source": [
516
  "#Doing the same to y_test as done to y_train \n",
517
+ "#y_test['open_scaled'] = scaler.fit_transform(y_test[['open']])\n",
518
+ "#y_test.drop('open', axis=1, inplace=True)"
519
  ]
520
  },
521
  {
522
  "cell_type": "code",
523
+ "execution_count": 51,
524
  "metadata": {},
525
  "outputs": [],
526
  "source": [
 
567
  },
568
  {
569
  "cell_type": "code",
570
+ "execution_count": 52,
571
  "metadata": {},
572
  "outputs": [
573
  {
574
  "name": "stdout",
575
  "output_type": "stream",
576
  "text": [
577
+ "2024-05-14 12:27:09,948 WARNING: DeprecationWarning: np.find_common_type is deprecated. Please use `np.result_type` or `np.promote_types`.\n",
578
  "See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information. (Deprecated NumPy 1.25)\n",
579
  "\n"
580
  ]
 
598
  },
599
  {
600
  "cell_type": "code",
601
+ "execution_count": 53,
602
  "metadata": {},
603
  "outputs": [],
604
  "source": [
 
617
  },
618
  {
619
  "cell_type": "code",
620
+ "execution_count": 54,
621
  "metadata": {},
622
  "outputs": [
623
  {
624
  "name": "stdout",
625
  "output_type": "stream",
626
  "text": [
627
+ "\u001b[1m12/12\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m8s\u001b[0m 6ms/step - loss: 120898.4766\n"
628
  ]
629
  },
630
  {
631
  "data": {
632
  "text/plain": [
633
+ "<keras.src.callbacks.history.History at 0x2858e189fd0>"
634
  ]
635
  },
636
+ "execution_count": 54,
637
  "metadata": {},
638
  "output_type": "execute_result"
639
  }
 
645
  },
646
  {
647
  "cell_type": "code",
648
+ "execution_count": 55,
649
  "metadata": {},
650
  "outputs": [
651
  {
652
  "name": "stdout",
653
  "output_type": "stream",
654
  "text": [
655
+ "2024-05-14 12:27:25,395 WARNING: DeprecationWarning: np.find_common_type is deprecated. Please use `np.result_type` or `np.promote_types`.\n",
656
  "See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information. (Deprecated NumPy 1.25)\n",
657
  "\n"
658
  ]
 
676
  },
677
  {
678
  "cell_type": "code",
679
+ "execution_count": 56,
680
  "metadata": {},
681
  "outputs": [
682
  {
683
  "name": "stdout",
684
  "output_type": "stream",
685
  "text": [
686
+ "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 274ms/step\n",
687
+ "Root Mean Squared Error (RMSE): 187.9722523761173\n"
688
  ]
689
  }
690
  ],
 
703
  },
704
  {
705
  "cell_type": "code",
706
+ "execution_count": 57,
707
  "metadata": {},
708
  "outputs": [
709
  {
 
721
  },
722
  {
723
  "cell_type": "code",
724
+ "execution_count": 58,
725
+ "metadata": {},
726
+ "outputs": [
727
+ {
728
+ "name": "stdout",
729
+ "output_type": "stream",
730
+ "text": [
731
+ "<class 'pandas.core.frame.DataFrame'>\n",
732
+ "RangeIndex: 379 entries, 0 to 378\n",
733
+ "Data columns (total 1 columns):\n",
734
+ " # Column Non-Null Count Dtype \n",
735
+ "--- ------ -------------- ----- \n",
736
+ " 0 open 379 non-null float64\n",
737
+ "dtypes: float64(1)\n",
738
+ "memory usage: 3.1 KB\n"
739
+ ]
740
+ }
741
+ ],
742
+ "source": [
743
+ "y_train.info()"
744
+ ]
745
+ },
746
+ {
747
+ "cell_type": "code",
748
+ "execution_count": 59,
749
+ "metadata": {},
750
+ "outputs": [
751
+ {
752
+ "data": {
753
+ "text/plain": [
754
+ "array([[0.8625605 ],\n",
755
+ " [0.8625586 ],\n",
756
+ " [0.8625606 ],\n",
757
+ " [0.86255974],\n",
758
+ " [0.8625601 ],\n",
759
+ " [0.862559 ],\n",
760
+ " [0.8625611 ],\n",
761
+ " [0.8625565 ],\n",
762
+ " [0.86256105],\n",
763
+ " [0.86256105],\n",
764
+ " [0.8625609 ],\n",
765
+ " [0.86255926],\n",
766
+ " [0.8625612 ],\n",
767
+ " [0.86256075],\n",
768
+ " [0.8625595 ],\n",
769
+ " [0.8625567 ],\n",
770
+ " [0.8625613 ],\n",
771
+ " [0.8625613 ],\n",
772
+ " [0.8625598 ],\n",
773
+ " [0.86255765],\n",
774
+ " [0.86256105],\n",
775
+ " [0.86256033],\n",
776
+ " [0.86256117],\n",
777
+ " [0.86256 ],\n",
778
+ " [0.86256105],\n",
779
+ " [0.86255074],\n",
780
+ " [0.86256117],\n",
781
+ " [0.86256117],\n",
782
+ " [0.8625596 ],\n",
783
+ " [0.86256075],\n",
784
+ " [0.8625611 ],\n",
785
+ " [0.8625562 ],\n",
786
+ " [0.8625593 ],\n",
787
+ " [0.8625606 ],\n",
788
+ " [0.86255825],\n",
789
+ " [0.8625602 ],\n",
790
+ " [0.86256117],\n",
791
+ " [0.86255556],\n",
792
+ " [0.8625559 ],\n",
793
+ " [0.8625609 ],\n",
794
+ " [0.86256045],\n",
795
+ " [0.8625612 ],\n",
796
+ " [0.86256117],\n",
797
+ " [0.86255604],\n",
798
+ " [0.86255944],\n",
799
+ " [0.8625578 ],\n",
800
+ " [0.8625609 ],\n",
801
+ " [0.8625601 ],\n",
802
+ " [0.86256063],\n",
803
+ " [0.86255896],\n",
804
+ " [0.86256105],\n",
805
+ " [0.86256075],\n",
806
+ " [0.8625613 ],\n",
807
+ " [0.8625543 ],\n",
808
+ " [0.86255914],\n",
809
+ " [0.86256075],\n",
810
+ " [0.8625611 ],\n",
811
+ " [0.8625604 ],\n",
812
+ " [0.86256 ],\n",
813
+ " [0.8625611 ],\n",
814
+ " [0.86256063],\n",
815
+ " [0.8625612 ],\n",
816
+ " [0.8625572 ],\n",
817
+ " [0.8625559 ],\n",
818
+ " [0.8625613 ],\n",
819
+ " [0.8625582 ],\n",
820
+ " [0.8625613 ],\n",
821
+ " [0.862561 ],\n",
822
+ " [0.8625606 ],\n",
823
+ " [0.8625579 ],\n",
824
+ " [0.86256105],\n",
825
+ " [0.8625583 ],\n",
826
+ " [0.86255735],\n",
827
+ " [0.86255866],\n",
828
+ " [0.8625613 ],\n",
829
+ " [0.8625612 ],\n",
830
+ " [0.8625552 ],\n",
831
+ " [0.8625532 ],\n",
832
+ " [0.8625613 ],\n",
833
+ " [0.8625609 ],\n",
834
+ " [0.86256045],\n",
835
+ " [0.8625612 ],\n",
836
+ " [0.86255205],\n",
837
+ " [0.8625613 ],\n",
838
+ " [0.8625613 ],\n",
839
+ " [0.862559 ],\n",
840
+ " [0.86256117],\n",
841
+ " [0.8625567 ]], dtype=float32)"
842
+ ]
843
+ },
844
+ "execution_count": 59,
845
+ "metadata": {},
846
+ "output_type": "execute_result"
847
+ }
848
+ ],
849
+ "source": [
850
+ "y_pred"
851
+ ]
852
+ },
853
+ {
854
+ "cell_type": "code",
855
+ "execution_count": 60,
856
  "metadata": {},
857
  "outputs": [
858
  {
859
  "data": {
860
  "text/plain": [
861
+ "{'RMSE': 187.9722523761173}"
862
  ]
863
  },
864
+ "execution_count": 60,
865
  "metadata": {},
866
  "output_type": "execute_result"
867
  }
 
875
  },
876
  {
877
  "cell_type": "code",
878
+ "execution_count": 61,
879
  "metadata": {},
880
  "outputs": [],
881
  "source": [
 
887
  },
888
  {
889
  "cell_type": "code",
890
+ "execution_count": 62,
891
  "metadata": {},
892
  "outputs": [],
893
  "source": [
 
899
  },
900
  {
901
  "cell_type": "code",
902
+ "execution_count": 63,
903
  "metadata": {},
904
  "outputs": [],
905
  "source": [
 
916
  },
917
  {
918
  "cell_type": "code",
919
+ "execution_count": 64,
920
  "metadata": {},
921
  "outputs": [
922
  {
 
929
  {
930
  "data": {
931
  "application/vnd.jupyter.widget-view+json": {
932
+ "model_id": "fa33c5ea489f4cde9b6c4d3a4012d3a5",
933
  "version_major": 2,
934
  "version_minor": 0
935
  },
 
943
  {
944
  "data": {
945
  "application/vnd.jupyter.widget-view+json": {
946
+ "model_id": "f62535a0b7db42bfa005baa53180cbf8",
947
  "version_major": 2,
948
  "version_minor": 0
949
  },
950
  "text/plain": [
951
+ "Uploading: 0.000%| | 0/291263 elapsed<00:00 remaining<?"
952
  ]
953
  },
954
  "metadata": {},
 
958
  "name": "stdout",
959
  "output_type": "stream",
960
  "text": [
961
+ "2024-05-14 12:27:43,638 WARNING: DeprecationWarning: np.find_common_type is deprecated. Please use `np.result_type` or `np.promote_types`.\n",
962
  "See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information. (Deprecated NumPy 1.25)\n",
963
  "\n"
964
  ]
 
966
  {
967
  "data": {
968
  "application/vnd.jupyter.widget-view+json": {
969
+ "model_id": "8128a25f8fd74b7f9d11d6518437f5ed",
970
  "version_major": 2,
971
  "version_minor": 0
972
  },
973
  "text/plain": [
974
+ "Uploading: 0.000%| | 0/44 elapsed<00:00 remaining<?"
975
  ]
976
  },
977
  "metadata": {},
 
980
  {
981
  "data": {
982
  "application/vnd.jupyter.widget-view+json": {
983
+ "model_id": "67f653372fdf455a98dd084cfdb0482a",
984
  "version_major": 2,
985
  "version_minor": 0
986
  },
987
  "text/plain": [
988
+ "Uploading: 0.000%| | 0/554 elapsed<00:00 remaining<?"
989
  ]
990
  },
991
  "metadata": {},
 
995
  "name": "stdout",
996
  "output_type": "stream",
997
  "text": [
998
+ "Model created, explore it at https://c.app.hopsworks.ai:443/p/693399/models/stock_pred_model/28\n"
999
  ]
1000
  }
1001
  ],
Stocks news prediction/Notebooks/8_inference_pipeline.ipynb CHANGED
@@ -2,114 +2,16 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [
8
- {
9
- "name": "stdout",
10
- "output_type": "stream",
11
- "text": [
12
- "Connected. Call `.close()` to terminate connection gracefully.\n",
13
- "\n",
14
- "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/693399\n",
15
- "Connected. Call `.close()` to terminate connection gracefully.\n",
16
- "Connected. Call `.close()` to terminate connection gracefully.\n",
17
- "Training dataset job started successfully, you can follow the progress at \n",
18
- "https://c.app.hopsworks.ai/p/693399/jobs/named/tesla_stocks_fv_1_create_fv_td_10052024081806/executions\n",
19
- "2024-05-10 10:19:22,487 WARNING: VersionWarning: Incremented version to `16`.\n",
20
- "\n",
21
- "2024-05-10 10:19:23,632 WARNING: DeprecationWarning: np.find_common_type is deprecated. Please use `np.result_type` or `np.promote_types`.\n",
22
- "See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information. (Deprecated NumPy 1.25)\n",
23
- "\n",
24
- "\u001b[1m12/12\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 5ms/step - loss: 0.5137 \n",
25
- "2024-05-10 10:19:29,747 WARNING: DeprecationWarning: np.find_common_type is deprecated. Please use `np.result_type` or `np.promote_types`.\n",
26
- "See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information. (Deprecated NumPy 1.25)\n",
27
- "\n",
28
- "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 247ms/step\n",
29
- "Root Mean Squared Error (RMSE): 0.3917957758452665\n",
30
- "Connected. Call `.close()` to terminate connection gracefully.\n",
31
- "Connected. Call `.close()` to terminate connection gracefully.\n"
32
- ]
33
- },
34
- {
35
- "data": {
36
- "application/vnd.jupyter.widget-view+json": {
37
- "model_id": "b966a8a466c64b34ababb947368a545f",
38
- "version_major": 2,
39
- "version_minor": 0
40
- },
41
- "text/plain": [
42
- " 0%| | 0/6 [00:00<?, ?it/s]"
43
- ]
44
- },
45
- "metadata": {},
46
- "output_type": "display_data"
47
- },
48
- {
49
- "data": {
50
- "application/vnd.jupyter.widget-view+json": {
51
- "model_id": "018a87899aa141ae829f4dadb3974809",
52
- "version_major": 2,
53
- "version_minor": 0
54
- },
55
- "text/plain": [
56
- "Uploading: 0.000%| | 0/291253 elapsed<00:00 remaining<?"
57
- ]
58
- },
59
- "metadata": {},
60
- "output_type": "display_data"
61
- },
62
- {
63
- "name": "stdout",
64
- "output_type": "stream",
65
- "text": [
66
- "2024-05-10 10:19:38,395 WARNING: DeprecationWarning: np.find_common_type is deprecated. Please use `np.result_type` or `np.promote_types`.\n",
67
- "See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information. (Deprecated NumPy 1.25)\n",
68
- "\n"
69
- ]
70
- },
71
- {
72
- "data": {
73
- "application/vnd.jupyter.widget-view+json": {
74
- "model_id": "e1e097961cae41f0acae295f29a10a95",
75
- "version_major": 2,
76
- "version_minor": 0
77
- },
78
- "text/plain": [
79
- "Uploading: 0.000%| | 0/45 elapsed<00:00 remaining<?"
80
- ]
81
- },
82
- "metadata": {},
83
- "output_type": "display_data"
84
- },
85
- {
86
- "data": {
87
- "application/vnd.jupyter.widget-view+json": {
88
- "model_id": "c40f6b1ab70e435aaf151a1c901e5eb1",
89
- "version_major": 2,
90
- "version_minor": 0
91
- },
92
- "text/plain": [
93
- "Uploading: 0.000%| | 0/561 elapsed<00:00 remaining<?"
94
- ]
95
- },
96
- "metadata": {},
97
- "output_type": "display_data"
98
- },
99
- {
100
- "name": "stdout",
101
- "output_type": "stream",
102
- "text": [
103
- "Model created, explore it at https://c.app.hopsworks.ai:443/p/693399/models/stock_pred_model/18\n"
104
- ]
105
- },
106
  {
107
  "data": {
108
  "text/plain": [
109
  "True"
110
  ]
111
  },
112
- "execution_count": 1,
113
  "metadata": {},
114
  "output_type": "execute_result"
115
  }
@@ -132,7 +34,7 @@
132
  },
133
  {
134
  "cell_type": "code",
135
- "execution_count": 2,
136
  "metadata": {},
137
  "outputs": [
138
  {
@@ -157,53 +59,53 @@
157
  },
158
  {
159
  "cell_type": "code",
160
- "execution_count": 3,
161
  "metadata": {},
162
  "outputs": [
163
  {
164
  "name": "stdout",
165
  "output_type": "stream",
166
  "text": [
167
- "2024-05-07\n"
168
  ]
169
  }
170
  ],
171
  "source": [
172
- "start_date = datetime.now() - timedelta(hours=72)\n",
173
  "print(start_date.strftime(\"%Y-%m-%d\"))"
174
  ]
175
  },
176
  {
177
  "cell_type": "code",
178
- "execution_count": 4,
179
  "metadata": {},
180
  "outputs": [
181
  {
182
  "name": "stdout",
183
  "output_type": "stream",
184
  "text": [
185
- "2024-05-08\n"
186
  ]
187
  }
188
  ],
189
  "source": [
190
- "end_date = datetime.now() - timedelta(hours=48)\n",
191
  "print(end_date.strftime(\"%Y-%m-%d\"))"
192
  ]
193
  },
194
  {
195
  "cell_type": "code",
196
- "execution_count": 5,
197
  "metadata": {},
198
  "outputs": [],
199
  "source": [
200
- "feature_view = fs.get_feature_view('tesla_stocks_fv', 3)\n",
201
- "feature_view.init_batch_scoring(training_dataset_version=2)"
202
  ]
203
  },
204
  {
205
  "cell_type": "code",
206
- "execution_count": 6,
207
  "metadata": {},
208
  "outputs": [
209
  {
@@ -212,8 +114,8 @@
212
  "text": [
213
  "WITH right_fg0 AS (SELECT *\n",
214
  "FROM (SELECT `fg1`.`date` `date`, `fg1`.`ticker` `ticker`, `fg1`.`ticker` `join_pk_ticker`, `fg1`.`date` `join_evt_date`, `fg0`.`sentiment` `sentiment`, RANK() OVER (PARTITION BY `fg1`.`ticker`, `fg1`.`date` ORDER BY `fg0`.`date` DESC) pit_rank_hopsworks\n",
215
- "FROM `klittefr_featurestore`.`tesla_stock_3` `fg1`\n",
216
- "INNER JOIN `klittefr_featurestore`.`news_sentiment_updated_3` `fg0` ON `fg1`.`ticker` = `fg0`.`ticker` AND `fg1`.`date` >= `fg0`.`date`) NA\n",
217
  "WHERE `pit_rank_hopsworks` = 1) (SELECT `right_fg0`.`date` `date`, `right_fg0`.`ticker` `ticker`, `right_fg0`.`sentiment` `sentiment`\n",
218
  "FROM right_fg0)\n"
219
  ]
@@ -225,7 +127,7 @@
225
  },
226
  {
227
  "cell_type": "code",
228
- "execution_count": 7,
229
  "metadata": {},
230
  "outputs": [
231
  {
@@ -243,29 +145,7 @@
243
  },
244
  {
245
  "cell_type": "code",
246
- "execution_count": 8,
247
- "metadata": {},
248
- "outputs": [
249
- {
250
- "data": {
251
- "text/plain": [
252
- "0 0.010694\n",
253
- "Name: sentiment, dtype: float64"
254
- ]
255
- },
256
- "execution_count": 8,
257
- "metadata": {},
258
- "output_type": "execute_result"
259
- }
260
- ],
261
- "source": [
262
- "features_df = tesla_df_b.iloc[:,2]\n",
263
- "features_df"
264
- ]
265
- },
266
- {
267
- "cell_type": "code",
268
- "execution_count": 9,
269
  "metadata": {},
270
  "outputs": [
271
  {
@@ -297,9 +177,9 @@
297
  " <tbody>\n",
298
  " <tr>\n",
299
  " <th>0</th>\n",
300
- " <td>2024-05-08 00:00:00+00:00</td>\n",
301
  " <td>TSLA</td>\n",
302
- " <td>0.010694</td>\n",
303
  " </tr>\n",
304
  " </tbody>\n",
305
  "</table>\n",
@@ -307,10 +187,10 @@
307
  ],
308
  "text/plain": [
309
  " date ticker sentiment\n",
310
- "0 2024-05-08 00:00:00+00:00 TSLA 0.010694"
311
  ]
312
  },
313
- "execution_count": 9,
314
  "metadata": {},
315
  "output_type": "execute_result"
316
  }
@@ -321,7 +201,7 @@
321
  },
322
  {
323
  "cell_type": "code",
324
- "execution_count": 10,
325
  "metadata": {},
326
  "outputs": [],
327
  "source": [
@@ -330,7 +210,7 @@
330
  },
331
  {
332
  "cell_type": "code",
333
- "execution_count": 11,
334
  "metadata": {},
335
  "outputs": [],
336
  "source": [
@@ -356,14 +236,14 @@
356
  },
357
  {
358
  "cell_type": "code",
359
- "execution_count": 12,
360
  "metadata": {},
361
  "outputs": [
362
  {
363
  "name": "stdout",
364
  "output_type": "stream",
365
  "text": [
366
- "2024-05-10 10:20:03,207 WARNING: DeprecationWarning: np.find_common_type is deprecated. Please use `np.result_type` or `np.promote_types`.\n",
367
  "See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information. (Deprecated NumPy 1.25)\n",
368
  "\n"
369
  ]
@@ -387,7 +267,7 @@
387
  },
388
  {
389
  "cell_type": "code",
390
- "execution_count": 13,
391
  "metadata": {},
392
  "outputs": [
393
  {
@@ -401,7 +281,7 @@
401
  "source": [
402
  "import joblib\n",
403
  "\n",
404
- "the_model = mr.get_model(\"stock_pred_model\", version=9)\n",
405
  "model_dir = the_model.download()\n",
406
  "\n",
407
  "model = joblib.load(model_dir + \"/stock_prediction_model.pkl\")"
@@ -409,14 +289,14 @@
409
  },
410
  {
411
  "cell_type": "code",
412
- "execution_count": 14,
413
  "metadata": {},
414
  "outputs": [
415
  {
416
  "name": "stdout",
417
  "output_type": "stream",
418
  "text": [
419
- "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 582ms/step\n"
420
  ]
421
  }
422
  ],
@@ -426,16 +306,16 @@
426
  },
427
  {
428
  "cell_type": "code",
429
- "execution_count": 15,
430
  "metadata": {},
431
  "outputs": [
432
  {
433
  "data": {
434
  "text/plain": [
435
- "array([[0.18933737]], dtype=float32)"
436
  ]
437
  },
438
- "execution_count": 15,
439
  "metadata": {},
440
  "output_type": "execute_result"
441
  }
@@ -446,7 +326,31 @@
446
  },
447
  {
448
  "cell_type": "code",
449
- "execution_count": 16,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
  "metadata": {},
451
  "outputs": [],
452
  "source": [
@@ -455,7 +359,7 @@
455
  },
456
  {
457
  "cell_type": "code",
458
- "execution_count": 17,
459
  "metadata": {},
460
  "outputs": [],
461
  "source": [
@@ -468,7 +372,7 @@
468
  },
469
  {
470
  "cell_type": "code",
471
- "execution_count": 18,
472
  "metadata": {},
473
  "outputs": [],
474
  "source": [
@@ -477,7 +381,7 @@
477
  },
478
  {
479
  "cell_type": "code",
480
- "execution_count": 19,
481
  "metadata": {},
482
  "outputs": [
483
  {
@@ -510,21 +414,21 @@
510
  " <tbody>\n",
511
  " <tr>\n",
512
  " <th>0</th>\n",
513
- " <td>0.010694</td>\n",
514
  " <td>1.0</td>\n",
515
- " <td>[0.1893373727798462]</td>\n",
516
- " <td>2024-05-08</td>\n",
517
  " </tr>\n",
518
  " </tbody>\n",
519
  "</table>\n",
520
  "</div>"
521
  ],
522
  "text/plain": [
523
- " sentiment ticker_TSLA predictions date\n",
524
- "0 0.010694 1.0 [0.1893373727798462] 2024-05-08"
525
  ]
526
  },
527
- "execution_count": 19,
528
  "metadata": {},
529
  "output_type": "execute_result"
530
  }
@@ -535,7 +439,7 @@
535
  },
536
  {
537
  "cell_type": "code",
538
- "execution_count": 27,
539
  "metadata": {},
540
  "outputs": [],
541
  "source": [
@@ -554,7 +458,7 @@
554
  },
555
  {
556
  "cell_type": "code",
557
- "execution_count": 29,
558
  "metadata": {},
559
  "outputs": [
560
  {
@@ -587,9 +491,9 @@
587
  " <tbody>\n",
588
  " <tr>\n",
589
  " <th>0</th>\n",
590
- " <td>0.010694</td>\n",
591
- " <td>[0.1893373727798462]</td>\n",
592
- " <td>2024-05-08</td>\n",
593
  " <td>TSLA</td>\n",
594
  " </tr>\n",
595
  " </tbody>\n",
@@ -597,11 +501,11 @@
597
  "</div>"
598
  ],
599
  "text/plain": [
600
- " sentiment predictions date ticker\n",
601
- "0 0.010694 [0.1893373727798462] 2024-05-08 TSLA"
602
  ]
603
  },
604
- "execution_count": 29,
605
  "metadata": {},
606
  "output_type": "execute_result"
607
  }
@@ -612,7 +516,31 @@
612
  },
613
  {
614
  "cell_type": "code",
615
- "execution_count": 31,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
616
  "metadata": {},
617
  "outputs": [
618
  {
@@ -635,14 +563,14 @@
635
  },
636
  {
637
  "cell_type": "code",
638
- "execution_count": 38,
639
  "metadata": {},
640
  "outputs": [
641
  {
642
  "name": "stdout",
643
  "output_type": "stream",
644
  "text": [
645
- "2024-05-10 10:35:57,610 WARNING: DeprecationWarning: Providing event_time as a single-element list is deprecated and will be dropped in future versions. Provide the feature_name string instead.\n",
646
  "\n"
647
  ]
648
  }
@@ -650,7 +578,7 @@
650
  "source": [
651
  "results_fg = fs.get_or_create_feature_group(\n",
652
  " name= 'stock_prediction_results',\n",
653
- " version = 1,\n",
654
  " description = 'Predction of TSLA open stock price',\n",
655
  " primary_key = ['ticker'],\n",
656
  " event_time = ['date'],\n",
@@ -660,7 +588,7 @@
660
  },
661
  {
662
  "cell_type": "code",
663
- "execution_count": 39,
664
  "metadata": {},
665
  "outputs": [
666
  {
@@ -668,13 +596,13 @@
668
  "output_type": "stream",
669
  "text": [
670
  "Feature Group created successfully, explore it at \n",
671
- "https://c.app.hopsworks.ai:443/p/693399/fs/689222/fg/798071\n"
672
  ]
673
  },
674
  {
675
  "data": {
676
  "application/vnd.jupyter.widget-view+json": {
677
- "model_id": "cfcfecba9ed34fb591c32bf73d87ee1c",
678
  "version_major": 2,
679
  "version_minor": 0
680
  },
@@ -689,18 +617,18 @@
689
  "name": "stdout",
690
  "output_type": "stream",
691
  "text": [
692
- "Launching job: stock_prediction_results_1_offline_fg_materialization\n",
693
  "Job started successfully, you can follow the progress at \n",
694
- "https://c.app.hopsworks.ai/p/693399/jobs/named/stock_prediction_results_1_offline_fg_materialization/executions\n"
695
  ]
696
  },
697
  {
698
  "data": {
699
  "text/plain": [
700
- "(<hsfs.core.job.Job at 0x21cf75e5f10>, None)"
701
  ]
702
  },
703
- "execution_count": 39,
704
  "metadata": {},
705
  "output_type": "execute_result"
706
  }
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 40,
6
  "metadata": {},
7
  "outputs": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  {
9
  "data": {
10
  "text/plain": [
11
  "True"
12
  ]
13
  },
14
+ "execution_count": 40,
15
  "metadata": {},
16
  "output_type": "execute_result"
17
  }
 
34
  },
35
  {
36
  "cell_type": "code",
37
+ "execution_count": 41,
38
  "metadata": {},
39
  "outputs": [
40
  {
 
59
  },
60
  {
61
  "cell_type": "code",
62
+ "execution_count": 42,
63
  "metadata": {},
64
  "outputs": [
65
  {
66
  "name": "stdout",
67
  "output_type": "stream",
68
  "text": [
69
+ "2024-05-12\n"
70
  ]
71
  }
72
  ],
73
  "source": [
74
+ "start_date = datetime.now() - timedelta(hours=48)\n",
75
  "print(start_date.strftime(\"%Y-%m-%d\"))"
76
  ]
77
  },
78
  {
79
  "cell_type": "code",
80
+ "execution_count": 43,
81
  "metadata": {},
82
  "outputs": [
83
  {
84
  "name": "stdout",
85
  "output_type": "stream",
86
  "text": [
87
+ "2024-05-13\n"
88
  ]
89
  }
90
  ],
91
  "source": [
92
+ "end_date = datetime.now() - timedelta(hours=24)\n",
93
  "print(end_date.strftime(\"%Y-%m-%d\"))"
94
  ]
95
  },
96
  {
97
  "cell_type": "code",
98
+ "execution_count": 44,
99
  "metadata": {},
100
  "outputs": [],
101
  "source": [
102
+ "feature_view = fs.get_feature_view('tesla_stocks_fv', 5)\n",
103
+ "feature_view.init_batch_scoring(training_dataset_version=1)"
104
  ]
105
  },
106
  {
107
  "cell_type": "code",
108
+ "execution_count": 45,
109
  "metadata": {},
110
  "outputs": [
111
  {
 
114
  "text": [
115
  "WITH right_fg0 AS (SELECT *\n",
116
  "FROM (SELECT `fg1`.`date` `date`, `fg1`.`ticker` `ticker`, `fg1`.`ticker` `join_pk_ticker`, `fg1`.`date` `join_evt_date`, `fg0`.`sentiment` `sentiment`, RANK() OVER (PARTITION BY `fg1`.`ticker`, `fg1`.`date` ORDER BY `fg0`.`date` DESC) pit_rank_hopsworks\n",
117
+ "FROM `klittefr_featurestore`.`tesla_stock_5` `fg1`\n",
118
+ "INNER JOIN `klittefr_featurestore`.`news_sentiment_updated_5` `fg0` ON `fg1`.`ticker` = `fg0`.`ticker` AND `fg1`.`date` >= `fg0`.`date`) NA\n",
119
  "WHERE `pit_rank_hopsworks` = 1) (SELECT `right_fg0`.`date` `date`, `right_fg0`.`ticker` `ticker`, `right_fg0`.`sentiment` `sentiment`\n",
120
  "FROM right_fg0)\n"
121
  ]
 
127
  },
128
  {
129
  "cell_type": "code",
130
+ "execution_count": 46,
131
  "metadata": {},
132
  "outputs": [
133
  {
 
145
  },
146
  {
147
  "cell_type": "code",
148
+ "execution_count": 47,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  "metadata": {},
150
  "outputs": [
151
  {
 
177
  " <tbody>\n",
178
  " <tr>\n",
179
  " <th>0</th>\n",
180
+ " <td>2024-05-13 00:00:00+00:00</td>\n",
181
  " <td>TSLA</td>\n",
182
+ " <td>0.115443</td>\n",
183
  " </tr>\n",
184
  " </tbody>\n",
185
  "</table>\n",
 
187
  ],
188
  "text/plain": [
189
  " date ticker sentiment\n",
190
+ "0 2024-05-13 00:00:00+00:00 TSLA 0.115443"
191
  ]
192
  },
193
+ "execution_count": 47,
194
  "metadata": {},
195
  "output_type": "execute_result"
196
  }
 
201
  },
202
  {
203
  "cell_type": "code",
204
+ "execution_count": 48,
205
  "metadata": {},
206
  "outputs": [],
207
  "source": [
 
210
  },
211
  {
212
  "cell_type": "code",
213
+ "execution_count": 49,
214
  "metadata": {},
215
  "outputs": [],
216
  "source": [
 
236
  },
237
  {
238
  "cell_type": "code",
239
+ "execution_count": 50,
240
  "metadata": {},
241
  "outputs": [
242
  {
243
  "name": "stdout",
244
  "output_type": "stream",
245
  "text": [
246
+ "2024-05-14 12:30:49,197 WARNING: DeprecationWarning: np.find_common_type is deprecated. Please use `np.result_type` or `np.promote_types`.\n",
247
  "See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information. (Deprecated NumPy 1.25)\n",
248
  "\n"
249
  ]
 
267
  },
268
  {
269
  "cell_type": "code",
270
+ "execution_count": 51,
271
  "metadata": {},
272
  "outputs": [
273
  {
 
281
  "source": [
282
  "import joblib\n",
283
  "\n",
284
+ "the_model = mr.get_model(\"stock_pred_model\", version=28)\n",
285
  "model_dir = the_model.download()\n",
286
  "\n",
287
  "model = joblib.load(model_dir + \"/stock_prediction_model.pkl\")"
 
289
  },
290
  {
291
  "cell_type": "code",
292
+ "execution_count": 52,
293
  "metadata": {},
294
  "outputs": [
295
  {
296
  "name": "stdout",
297
  "output_type": "stream",
298
  "text": [
299
+ "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 1s/step\n"
300
  ]
301
  }
302
  ],
 
306
  },
307
  {
308
  "cell_type": "code",
309
+ "execution_count": 53,
310
  "metadata": {},
311
  "outputs": [
312
  {
313
  "data": {
314
  "text/plain": [
315
+ "array([[0.8625609]], dtype=float32)"
316
  ]
317
  },
318
+ "execution_count": 53,
319
  "metadata": {},
320
  "output_type": "execute_result"
321
  }
 
326
  },
327
  {
328
  "cell_type": "code",
329
+ "execution_count": 54,
330
+ "metadata": {},
331
+ "outputs": [
332
+ {
333
+ "name": "stdout",
334
+ "output_type": "stream",
335
+ "text": [
336
+ "86.25609278678894\n"
337
+ ]
338
+ }
339
+ ],
340
+ "source": [
341
+ "import numpy as np\n",
342
+ "\n",
343
+ "# Our predictions array\n",
344
+ "predictions = np.array(predictions, dtype=np.float32)\n",
345
+ "\n",
346
+ "# Changing the format of the predicted value to correspond with format of \"open\"\n",
347
+ "predictions = predictions[0][0]*100\n",
348
+ "print(predictions)\n"
349
+ ]
350
+ },
351
+ {
352
+ "cell_type": "code",
353
+ "execution_count": 55,
354
  "metadata": {},
355
  "outputs": [],
356
  "source": [
 
359
  },
360
  {
361
  "cell_type": "code",
362
+ "execution_count": 56,
363
  "metadata": {},
364
  "outputs": [],
365
  "source": [
 
372
  },
373
  {
374
  "cell_type": "code",
375
+ "execution_count": 57,
376
  "metadata": {},
377
  "outputs": [],
378
  "source": [
 
381
  },
382
  {
383
  "cell_type": "code",
384
+ "execution_count": 58,
385
  "metadata": {},
386
  "outputs": [
387
  {
 
414
  " <tbody>\n",
415
  " <tr>\n",
416
  " <th>0</th>\n",
417
+ " <td>0.115443</td>\n",
418
  " <td>1.0</td>\n",
419
+ " <td>86.256093</td>\n",
420
+ " <td>2024-05-13</td>\n",
421
  " </tr>\n",
422
  " </tbody>\n",
423
  "</table>\n",
424
  "</div>"
425
  ],
426
  "text/plain": [
427
+ " sentiment ticker_TSLA predictions date\n",
428
+ "0 0.115443 1.0 86.256093 2024-05-13"
429
  ]
430
  },
431
+ "execution_count": 58,
432
  "metadata": {},
433
  "output_type": "execute_result"
434
  }
 
439
  },
440
  {
441
  "cell_type": "code",
442
+ "execution_count": 59,
443
  "metadata": {},
444
  "outputs": [],
445
  "source": [
 
458
  },
459
  {
460
  "cell_type": "code",
461
+ "execution_count": 60,
462
  "metadata": {},
463
  "outputs": [
464
  {
 
491
  " <tbody>\n",
492
  " <tr>\n",
493
  " <th>0</th>\n",
494
+ " <td>0.115443</td>\n",
495
+ " <td>86.256093</td>\n",
496
+ " <td>2024-05-13</td>\n",
497
  " <td>TSLA</td>\n",
498
  " </tr>\n",
499
  " </tbody>\n",
 
501
  "</div>"
502
  ],
503
  "text/plain": [
504
+ " sentiment predictions date ticker\n",
505
+ "0 0.115443 86.256093 2024-05-13 TSLA"
506
  ]
507
  },
508
+ "execution_count": 60,
509
  "metadata": {},
510
  "output_type": "execute_result"
511
  }
 
516
  },
517
  {
518
  "cell_type": "code",
519
+ "execution_count": 61,
520
+ "metadata": {},
521
+ "outputs": [],
522
+ "source": [
523
+ "#from sklearn.preprocessing import MinMaxScaler\n",
524
+ "\n",
525
+ "# Flatten the list of lists into a single list\n",
526
+ "#flat_predictions_scaled = [item for sublist in predictions_scaled for item in sublist]\n",
527
+ "\n",
528
+ "# Initialize the MinMaxScaler\n",
529
+ "#scaler = MinMaxScaler()\n",
530
+ "\n",
531
+ "# Fit the scaler to the scaled predictions\n",
532
+ "#scaler.fit(flat_predictions_scaled)\n",
533
+ "\n",
534
+ "# Inverse transform the scaled predictions to get the original values\n",
535
+ "#predictions_unscaled = scaler.inverse_transform(flat_predictions_scaled)\n",
536
+ "\n",
537
+ "# Update the 'predictions' column with the unscaled values\n",
538
+ "#tesla_df_b['predictions'] = predictions_unscaled"
539
+ ]
540
+ },
541
+ {
542
+ "cell_type": "code",
543
+ "execution_count": 62,
544
  "metadata": {},
545
  "outputs": [
546
  {
 
563
  },
564
  {
565
  "cell_type": "code",
566
+ "execution_count": 65,
567
  "metadata": {},
568
  "outputs": [
569
  {
570
  "name": "stdout",
571
  "output_type": "stream",
572
  "text": [
573
+ "2024-05-14 12:39:44,585 WARNING: DeprecationWarning: Providing event_time as a single-element list is deprecated and will be dropped in future versions. Provide the feature_name string instead.\n",
574
  "\n"
575
  ]
576
  }
 
578
  "source": [
579
  "results_fg = fs.get_or_create_feature_group(\n",
580
  " name= 'stock_prediction_results',\n",
581
+ " version = 4,\n",
582
  " description = 'Predction of TSLA open stock price',\n",
583
  " primary_key = ['ticker'],\n",
584
  " event_time = ['date'],\n",
 
588
  },
589
  {
590
  "cell_type": "code",
591
+ "execution_count": 66,
592
  "metadata": {},
593
  "outputs": [
594
  {
 
596
  "output_type": "stream",
597
  "text": [
598
  "Feature Group created successfully, explore it at \n",
599
+ "https://c.app.hopsworks.ai:443/p/693399/fs/689222/fg/814414\n"
600
  ]
601
  },
602
  {
603
  "data": {
604
  "application/vnd.jupyter.widget-view+json": {
605
+ "model_id": "33665584853d402aaa2c6c8dc2386ed5",
606
  "version_major": 2,
607
  "version_minor": 0
608
  },
 
617
  "name": "stdout",
618
  "output_type": "stream",
619
  "text": [
620
+ "Launching job: stock_prediction_results_4_offline_fg_materialization\n",
621
  "Job started successfully, you can follow the progress at \n",
622
+ "https://c.app.hopsworks.ai/p/693399/jobs/named/stock_prediction_results_4_offline_fg_materialization/executions\n"
623
  ]
624
  },
625
  {
626
  "data": {
627
  "text/plain": [
628
+ "(<hsfs.core.job.Job at 0x23f71193dd0>, None)"
629
  ]
630
  },
631
+ "execution_count": 66,
632
  "metadata": {},
633
  "output_type": "execute_result"
634
  }
Stocks news prediction/Notebooks/TSLA_stock_price.csv CHANGED
@@ -1,4 +1,7 @@
1
  date,1. open,2. high,3. low,4. close,5. volume,ticker
 
 
 
2
  2024-05-08,171.59,176.06,170.15,174.72,79969488.0,TSLA
3
  2024-05-07,182.4,183.26,177.4,177.81,75045854.0,TSLA
4
  2024-05-06,183.8,187.56,182.2,184.76,84390253.0,TSLA
 
1
  date,1. open,2. high,3. low,4. close,5. volume,ticker
2
+ 2024-05-13,170.0,175.4,169.0,171.89,67018903.0,TSLA
3
+ 2024-05-10,173.05,173.0599,167.75,168.47,72627178.0,TSLA
4
+ 2024-05-09,175.01,175.62,171.37,171.97,65950292.0,TSLA
5
  2024-05-08,171.59,176.06,170.15,174.72,79969488.0,TSLA
6
  2024-05-07,182.4,183.26,177.4,177.81,75045854.0,TSLA
7
  2024-05-06,183.8,187.56,182.2,184.76,84390253.0,TSLA
Stocks news prediction/Notebooks/news_articles.csv CHANGED
@@ -1,76 +1,75 @@
1
  date,ticker,sentiment
 
 
 
 
 
2
  2024-05-08,TSLA,0.010694444444444444
3
  2024-05-07,TSLA,0.03277777777777777
4
- 2024-05-06,TSLA,0.1524924570379116
5
- 2024-05-05,TSLA,0.036190476190476197
6
- 2024-05-04,TSLA,0.06266489297739299
7
- 2024-05-03,TSLA,0.027797749388658477
8
- 2024-05-02,TSLA,0.04597474747474748
9
- 2024-04-21,TSLA,0.19239861948567305
10
- 2024-04-20,TSLA,0.015277777777777765
11
- 2024-04-19,TSLA,0.10501033866418481
12
- 2024-04-18,TSLA,0.1443942393913676
13
- 2024-04-17,TSLA,0.05556939178833589
14
- 2024-03-01,TSLA,0.0673826751951752
15
- 2024-02-29,TSLA,0.1382677639820497
16
- 2024-02-28,TSLA,0.07483175986443961
17
- 2024-02-27,TSLA,0.10612244897959182
18
- 2024-01-10,TSLA,0.15686835891381345
19
- 2024-01-09,TSLA,0.2049332611832612
20
- 2024-01-08,TSLA,0.10464797355422356
21
- 2024-01-07,TSLA,0.09936317540484207
22
- 2024-01-06,TSLA,0.4296943796943797
23
- 2023-11-20,TSLA,0.1286308621933622
24
- 2023-11-19,TSLA,0.28611111111111115
25
- 2023-11-18,TSLA,-0.007575757575757592
26
- 2023-11-17,TSLA,-0.12257936507936508
27
- 2023-11-16,TSLA,0.032539682539682535
28
- 2023-11-15,TSLA,0.2286113987682615
29
- 2023-09-30,TSLA,0.03125996810207336
30
- 2023-09-29,TSLA,0.13201757645206844
31
- 2023-09-28,TSLA,0.020467534614856047
32
- 2023-09-27,TSLA,0.1685227272727273
33
- 2023-08-10,TSLA,0.1457654999321666
34
- 2023-08-09,TSLA,0.2040932311621967
35
- 2023-08-08,TSLA,0.09198456790123458
36
- 2023-08-07,TSLA,0.024046438410074776
37
- 2023-08-06,TSLA,0.5
38
- 2023-08-05,TSLA,0.09166666666666667
39
- 2023-08-04,TSLA,0.0884226135817045
40
- 2023-06-20,TSLA,0.1742147348116736
41
- 2023-06-19,TSLA,0.05943156637601082
42
- 2023-06-18,TSLA,0.18055555555555555
43
- 2023-06-17,TSLA,0.11944444444444444
44
- 2023-06-16,TSLA,-0.0005538579067990851
45
- 2023-06-15,TSLA,0.20928030303030304
46
- 2023-04-30,TSLA,-0.006388888888888886
47
- 2023-04-29,TSLA,-0.07021604938271606
48
- 2023-04-28,TSLA,-0.035103114478114476
49
- 2023-04-27,TSLA,0.14129647667147668
50
- 2023-04-26,TSLA,0.10337159333967845
51
  2023-03-10,TSLA,0.13166666666666668
52
- 2023-03-09,TSLA,0.1600571961739241
53
- 2023-03-08,TSLA,0.11654703537744476
54
- 2023-03-07,TSLA,-0.030867153679653692
55
- 2023-01-18,TSLA,0.09678851484772537
56
- 2023-01-17,TSLA,0.10135499338624339
57
- 2023-01-16,TSLA,0.1377056277056277
58
- 2022-11-28,TSLA,0.046414530685364025
59
- 2022-11-27,TSLA,0.0
60
- 2022-11-26,TSLA,0.16136363636363638
61
- 2022-11-25,TSLA,0.1022067775974026
62
- 2022-11-24,TSLA,0.08333333333333333
63
- 2022-11-23,TSLA,0.15424622414622416
64
- 2022-10-08,TSLA,0.10722853535353534
65
- 2022-10-07,TSLA,0.156965109993722
66
- 2022-10-06,TSLA,0.11721946354299295
67
- 2022-10-05,TSLA,0.053409090909090906
68
- 2022-08-18,TSLA,0.0943373359444788
69
- 2022-08-17,TSLA,0.1733991702741703
70
- 2022-08-16,TSLA,0.01159855130249867
71
- 2022-08-15,TSLA,-0.017187499999999998
72
- 2022-06-28,TSLA,0.12629822576413485
73
- 2022-06-27,TSLA,0.1501262025012025
74
- 2022-06-26,TSLA,0.0005555555555555545
75
- 2022-06-25,TSLA,0.25
76
- 2022-06-24,TSLA,-0.08422373081463991
 
1
  date,ticker,sentiment
2
+ 2024-05-13,TSLA,0.11544328870717759
3
+ 2024-05-12,TSLA,0.037500000000000006
4
+ 2024-05-11,TSLA,0.09999999999999999
5
+ 2024-05-10,TSLA,0.069649648541694
6
+ 2024-05-09,TSLA,-0.03125
7
  2024-05-08,TSLA,0.010694444444444444
8
  2024-05-07,TSLA,0.03277777777777777
9
+ 2024-05-06,TSLA,0.1334894398530762
10
+ 2024-04-26,TSLA,0.19857909580131802
11
+ 2024-04-25,TSLA,0.0994785654160654
12
+ 2024-04-24,TSLA,-0.008469729154287984
13
+ 2024-04-23,TSLA,0.22602178458796107
14
+ 2024-03-06,TSLA,0.15092336662379766
15
+ 2024-03-05,TSLA,0.14880197389756214
16
+ 2024-03-04,TSLA,0.02845765345765346
17
+ 2024-01-15,TSLA,0.13911845730027547
18
+ 2024-01-14,TSLA,0.13266666666666668
19
+ 2024-01-13,TSLA,0.14180555555555555
20
+ 2024-01-12,TSLA,0.09484551447656285
21
+ 2024-01-11,TSLA,0.04215784215784215
22
+ 2024-01-10,TSLA,0.05081168831168831
23
+ 2023-11-25,TSLA,0.09511784511784512
24
+ 2023-11-24,TSLA,-0.08042929292929292
25
+ 2023-11-23,TSLA,0.19281726579520697
26
+ 2023-11-22,TSLA,0.13482358069490422
27
+ 2023-11-21,TSLA,0.31475243506493505
28
+ 2023-11-20,TSLA,0.11935703463203465
29
+ 2023-10-05,TSLA,0.06818091630591631
30
+ 2023-10-04,TSLA,0.10093246010525424
31
+ 2023-10-03,TSLA,0.11860375194784238
32
+ 2023-10-02,TSLA,-0.0008466045738773006
33
+ 2023-08-15,TSLA,0.21410188576855244
34
+ 2023-08-14,TSLA,-0.02790720470006184
35
+ 2023-08-13,TSLA,0.10277777777777776
36
+ 2023-08-12,TSLA,0.17281746031746034
37
+ 2023-08-11,TSLA,0.04555654709163481
38
+ 2023-06-25,TSLA,0.15089285714285716
39
+ 2023-06-24,TSLA,0.15306291916732256
40
+ 2023-06-23,TSLA,0.032385392385392384
41
+ 2023-06-22,TSLA,0.03318806841173364
42
+ 2023-05-05,TSLA,0.011805555555555555
43
+ 2023-05-04,TSLA,0.108013468013468
44
+ 2023-05-03,TSLA,0.14398358585858587
45
+ 2023-05-02,TSLA,0.10035511363636364
46
+ 2023-05-01,TSLA,0.24118326118326117
47
+ 2023-03-15,TSLA,0.20988057040998218
48
+ 2023-03-14,TSLA,0.0987584175084175
49
+ 2023-03-13,TSLA,0.17807920474587144
50
+ 2023-03-12,TSLA,0.0
51
+ 2023-03-11,TSLA,-0.01906565656565656
 
 
 
 
52
  2023-03-10,TSLA,0.13166666666666668
53
+ 2023-03-09,TSLA,0.4666666666666666
54
+ 2023-01-23,TSLA,0.08057381575900094
55
+ 2023-01-22,TSLA,0.04001262626262628
56
+ 2023-01-21,TSLA,0.07098965848965849
57
+ 2023-01-20,TSLA,0.13572390572390572
58
+ 2022-12-03,TSLA,0.0918560606060606
59
+ 2022-12-02,TSLA,0.04070950208011612
60
+ 2022-12-01,TSLA,0.0607790404040404
61
+ 2022-11-30,TSLA,0.0797694112845628
62
+ 2022-10-13,TSLA,0.11502750721500721
63
+ 2022-10-12,TSLA,0.03296728271728271
64
+ 2022-10-11,TSLA,0.09164985929353747
65
+ 2022-10-10,TSLA,0.10815323366160685
66
+ 2022-08-23,TSLA,0.11432291666666666
67
+ 2022-08-22,TSLA,0.10026435574229692
68
+ 2022-08-21,TSLA,0.1024592731829574
69
+ 2022-08-20,TSLA,0.13976190476190475
70
+ 2022-08-19,TSLA,0.15064935064935064
71
+ 2022-07-03,TSLA,-0.375
72
+ 2022-07-02,TSLA,0.03766666666666667
73
+ 2022-07-01,TSLA,0.17883820346320348
74
+ 2022-06-30,TSLA,0.08432771593485879
75
+ 2022-06-29,TSLA,0.0763806216931217
 
 
Stocks news prediction/Notebooks/news_articles_ema.csv CHANGED
@@ -1,76 +1,75 @@
1
  date,ticker,sentiment,exp_mean_7_days
2
- 2024-05-08,TSLA,0.010694444444444444,0.010694444444444444
3
- 2024-05-07,TSLA,0.03277777777777777,0.016215277777777773
4
- 2024-05-06,TSLA,0.1524924570379116,0.05028457259281123
5
- 2024-05-05,TSLA,0.036190476190476197,0.046761048492227474
6
- 2024-05-04,TSLA,0.06266489297739299,0.050737009613518846
7
- 2024-05-03,TSLA,0.027797749388658477,0.045002194557303754
8
- 2024-05-02,TSLA,0.04597474747474748,0.045245332786664684
9
- 2024-04-21,TSLA,0.19239861948567305,0.08203365446141678
10
- 2024-04-20,TSLA,0.015277777777777765,0.06534468529050702
11
- 2024-04-19,TSLA,0.10501033866418481,0.07526109863392647
12
- 2024-04-18,TSLA,0.1443942393913676,0.09254438382328675
13
- 2024-04-17,TSLA,0.05556939178833589,0.08330063581454902
14
- 2024-03-01,TSLA,0.0673826751951752,0.07932114565970556
15
- 2024-02-29,TSLA,0.1382677639820497,0.0940578002402916
16
- 2024-02-28,TSLA,0.07483175986443961,0.08925129014632861
17
- 2024-02-27,TSLA,0.10612244897959182,0.09346907985464442
18
- 2024-01-10,TSLA,0.15686835891381345,0.10931889961943668
19
- 2024-01-09,TSLA,0.2049332611832612,0.1332224900103928
20
- 2024-01-08,TSLA,0.10464797355422356,0.1260788608963505
21
- 2024-01-07,TSLA,0.09936317540484207,0.1193999395234734
22
- 2024-01-06,TSLA,0.4296943796943797,0.19697354956619997
23
- 2023-11-20,TSLA,0.1286308621933622,0.17988787772299053
24
- 2023-11-19,TSLA,0.28611111111111115,0.20644368607002067
25
- 2023-11-18,TSLA,-0.007575757575757592,0.1529388251585761
26
- 2023-11-17,TSLA,-0.12257936507936508,0.0840592775990908
27
- 2023-11-16,TSLA,0.032539682539682535,0.07117937883423874
28
- 2023-11-15,TSLA,0.2286113987682615,0.11053738381774442
29
- 2023-09-30,TSLA,0.03125996810207336,0.09071802988882666
30
- 2023-09-29,TSLA,0.13201757645206844,0.1010429165296371
31
- 2023-09-28,TSLA,0.020467534614856047,0.08089907105094184
32
- 2023-09-27,TSLA,0.1685227272727273,0.10280498510638819
33
- 2023-08-10,TSLA,0.1457654999321666,0.11354511381283279
34
- 2023-08-09,TSLA,0.2040932311621967,0.13618214315017377
35
- 2023-08-08,TSLA,0.09198456790123458,0.12513274933793897
36
- 2023-08-07,TSLA,0.024046438410074776,0.09986117160597292
37
- 2023-08-06,TSLA,0.5,0.1998958787044797
38
- 2023-08-05,TSLA,0.09166666666666667,0.17283857569502645
39
- 2023-08-04,TSLA,0.0884226135817045,0.15173458516669597
40
- 2023-06-20,TSLA,0.1742147348116736,0.15735462257794036
41
- 2023-06-19,TSLA,0.05943156637601082,0.13287385852745798
42
- 2023-06-18,TSLA,0.18055555555555555,0.14479428278448236
43
- 2023-06-17,TSLA,0.11944444444444444,0.1384568231994729
44
- 2023-06-16,TSLA,-0.0005538579067990851,0.1037041529229049
45
- 2023-06-15,TSLA,0.20928030303030304,0.13009819044975443
46
- 2023-04-30,TSLA,-0.006388888888888886,0.0959764206150936
47
- 2023-04-29,TSLA,-0.07021604938271606,0.05442830311564118
48
- 2023-04-28,TSLA,-0.035103114478114476,0.03204544871720227
49
- 2023-04-27,TSLA,0.14129647667147668,0.05935820570577087
50
- 2023-04-26,TSLA,0.10337159333967845,0.07036155261424776
51
- 2023-03-10,TSLA,0.13166666666666668,0.08568783112735248
52
- 2023-03-09,TSLA,0.1600571961739241,0.1042801723889954
53
- 2023-03-08,TSLA,0.11654703537744476,0.10734688813610774
54
- 2023-03-07,TSLA,-0.030867153679653692,0.07279337768216738
55
- 2023-01-18,TSLA,0.09678851484772537,0.07879216197355689
56
- 2023-01-17,TSLA,0.10135499338624339,0.08443286982672851
57
- 2023-01-16,TSLA,0.1377056277056277,0.09775105929645331
58
- 2022-11-28,TSLA,0.046414530685364025,0.08491692714368099
59
- 2022-11-27,TSLA,0.0,0.06368769535776074
60
- 2022-11-26,TSLA,0.16136363636363638,0.08810668060922965
61
- 2022-11-25,TSLA,0.1022067775974026,0.09163170485627288
62
- 2022-11-24,TSLA,0.08333333333333333,0.08955711197553799
63
- 2022-11-23,TSLA,0.15424622414622416,0.10572939001820952
64
- 2022-10-08,TSLA,0.10722853535353534,0.10610417635204097
65
- 2022-10-07,TSLA,0.156965109993722,0.11881940976246125
66
- 2022-10-06,TSLA,0.11721946354299295,0.11841942320759417
67
- 2022-10-05,TSLA,0.053409090909090906,0.10216684013296835
68
- 2022-08-18,TSLA,0.0943373359444788,0.10020946408584597
69
- 2022-08-17,TSLA,0.1733991702741703,0.11850689063292705
70
- 2022-08-16,TSLA,0.01159855130249867,0.09177980580031994
71
- 2022-08-15,TSLA,-0.017187499999999998,0.06453797935023994
72
- 2022-06-28,TSLA,0.12629822576413485,0.07997804095371366
73
- 2022-06-27,TSLA,0.1501262025012025,0.09751508134058587
74
- 2022-06-26,TSLA,0.0005555555555555545,0.07327519989432829
75
- 2022-06-25,TSLA,0.25,0.11745639992074622
76
- 2022-06-24,TSLA,-0.08422373081463991,0.0670363672368997
 
1
  date,ticker,sentiment,exp_mean_7_days
2
+ 2024-05-13,TSLA,0.11544328870717759,0.11544328870717759
3
+ 2024-05-12,TSLA,0.037500000000000006,0.0959574665303832
4
+ 2024-05-11,TSLA,0.09999999999999999,0.0969680998977874
5
+ 2024-05-10,TSLA,0.069649648541694,0.09013848705876404
6
+ 2024-05-09,TSLA,-0.03125,0.05979136529407303
7
+ 2024-05-08,TSLA,0.010694444444444444,0.047517135081665884
8
+ 2024-05-07,TSLA,0.03277777777777777,0.043832295755693855
9
+ 2024-05-06,TSLA,0.1334894398530762,0.06624658178003945
10
+ 2024-04-26,TSLA,0.19857909580131802,0.09932971028535909
11
+ 2024-04-25,TSLA,0.0994785654160654,0.09936692406803566
12
+ 2024-04-24,TSLA,-0.008469729154287984,0.07240776076245474
13
+ 2024-04-23,TSLA,0.22602178458796107,0.11081126671883132
14
+ 2024-03-06,TSLA,0.15092336662379766,0.1208392916950729
15
+ 2024-03-05,TSLA,0.14880197389756214,0.1278299622456952
16
+ 2024-03-04,TSLA,0.02845765345765346,0.10298688504868476
17
+ 2024-01-15,TSLA,0.13911845730027547,0.11201977811158245
18
+ 2024-01-14,TSLA,0.13266666666666668,0.11718150025035351
19
+ 2024-01-13,TSLA,0.14180555555555555,0.12333751407665403
20
+ 2024-01-12,TSLA,0.09484551447656285,0.11621451417663123
21
+ 2024-01-11,TSLA,0.04215784215784215,0.09770034617193396
22
+ 2024-01-10,TSLA,0.05081168831168831,0.08597818170687255
23
+ 2023-11-25,TSLA,0.09511784511784512,0.08826309755961569
24
+ 2023-11-24,TSLA,-0.08042929292929292,0.046089999937388534
25
+ 2023-11-23,TSLA,0.19281726579520697,0.08277181640184314
26
+ 2023-11-22,TSLA,0.13482358069490422,0.09578475747510841
27
+ 2023-11-21,TSLA,0.31475243506493505,0.15052667687256507
28
+ 2023-11-20,TSLA,0.11935703463203465,0.14273426631243247
29
+ 2023-10-05,TSLA,0.06818091630591631,0.12409592881080343
30
+ 2023-10-04,TSLA,0.10093246010525424,0.11830506163441613
31
+ 2023-10-03,TSLA,0.11860375194784238,0.1183797342127727
32
+ 2023-10-02,TSLA,-0.0008466045738773006,0.0885731495161102
33
+ 2023-08-15,TSLA,0.21410188576855244,0.11995533357922077
34
+ 2023-08-14,TSLA,-0.02790720470006184,0.0829896990094001
35
+ 2023-08-13,TSLA,0.10277777777777776,0.08793671870149451
36
+ 2023-08-12,TSLA,0.17281746031746034,0.10915690410548598
37
+ 2023-08-11,TSLA,0.04555654709163481,0.0932568148520232
38
+ 2023-06-25,TSLA,0.15089285714285716,0.10766582542473169
39
+ 2023-06-24,TSLA,0.15306291916732256,0.1190150988603794
40
+ 2023-06-23,TSLA,0.032385392385392384,0.09735767224163265
41
+ 2023-06-22,TSLA,0.03318806841173364,0.0813152712841579
42
+ 2023-05-05,TSLA,0.011805555555555555,0.06393784235200732
43
+ 2023-05-04,TSLA,0.108013468013468,0.07495674876737249
44
+ 2023-05-03,TSLA,0.14398358585858587,0.09221345804017583
45
+ 2023-05-02,TSLA,0.10035511363636364,0.09424887193922278
46
+ 2023-05-01,TSLA,0.24118326118326117,0.13098246925023238
47
+ 2023-03-15,TSLA,0.20988057040998218,0.15070699454016984
48
+ 2023-03-14,TSLA,0.0987584175084175,0.13771985028223174
49
+ 2023-03-13,TSLA,0.17807920474587144,0.14780968889814167
50
+ 2023-03-12,TSLA,0.0,0.11085726667360625
51
+ 2023-03-11,TSLA,-0.01906565656565656,0.07837653586379055
52
+ 2023-03-10,TSLA,0.13166666666666668,0.09169906856450957
53
+ 2023-03-09,TSLA,0.4666666666666666,0.18544096809004884
54
+ 2023-01-23,TSLA,0.08057381575900094,0.15922418000728686
55
+ 2023-01-22,TSLA,0.04001262626262628,0.1294212915711217
56
+ 2023-01-21,TSLA,0.07098965848965849,0.1148133833007559
57
+ 2023-01-20,TSLA,0.13572390572390572,0.12004101390654334
58
+ 2022-12-03,TSLA,0.0918560606060606,0.11299477558142267
59
+ 2022-12-02,TSLA,0.04070950208011612,0.09492345720609602
60
+ 2022-12-01,TSLA,0.0607790404040404,0.08638735300558212
61
+ 2022-11-30,TSLA,0.0797694112845628,0.0847328675753273
62
+ 2022-10-13,TSLA,0.11502750721500721,0.09230652748524729
63
+ 2022-10-12,TSLA,0.03296728271728271,0.07747171629325615
64
+ 2022-10-11,TSLA,0.09164985929353747,0.08101625204332648
65
+ 2022-10-10,TSLA,0.10815323366160685,0.08780049744789657
66
+ 2022-08-23,TSLA,0.11432291666666666,0.09443110225258909
67
+ 2022-08-22,TSLA,0.10026435574229692,0.09588941562501604
68
+ 2022-08-21,TSLA,0.1024592731829574,0.09753188001450137
69
+ 2022-08-20,TSLA,0.13976190476190475,0.10808938620135222
70
+ 2022-08-19,TSLA,0.15064935064935064,0.11872937731335183
71
+ 2022-07-03,TSLA,-0.375,-0.004702967014986126
72
+ 2022-07-02,TSLA,0.03766666666666667,0.005889441405427073
73
+ 2022-07-01,TSLA,0.17883820346320348,0.049126631919871176
74
+ 2022-06-30,TSLA,0.08432771593485879,0.05792690292361807
75
+ 2022-06-29,TSLA,0.0763806216931217,0.06254033261599398
 
Stocks news prediction/Notebooks/stock_prediction_model/stock_prediction_model.pkl CHANGED
Binary files a/Stocks news prediction/Notebooks/stock_prediction_model/stock_prediction_model.pkl and b/Stocks news prediction/Notebooks/stock_prediction_model/stock_prediction_model.pkl differ