mtzeve commited on
Commit
92a94bb
1 Parent(s): 33aa194

Updated sheesh

Browse files
TSLA_stock_price.csv CHANGED
@@ -1,4 +1,7 @@
1
  date,1. open,2. high,3. low,4. close,5. volume
 
 
 
2
  2024-04-24,162.84,167.97,157.51,162.13,181178020.0
3
  2024-04-23,143.33,147.26,141.11,144.68,124545104.0
4
  2024-04-22,140.56,144.44,138.8025,142.05,107097564.0
 
1
  date,1. open,2. high,3. low,4. close,5. volume
2
+ 2024-04-29,188.42,198.87,184.54,194.05,243869678.0
3
+ 2024-04-26,168.85,172.12,166.37,168.29,109815725.0
4
+ 2024-04-25,158.96,170.88,158.36,170.18,126427521.0
5
  2024-04-24,162.84,167.97,157.51,162.13,181178020.0
6
  2024-04-23,143.33,147.26,141.11,144.68,124545104.0
7
  2024-04-22,140.56,144.44,138.8025,142.05,107097564.0
__pycache__/feature_engineering.cpython-311.pyc CHANGED
Binary files a/__pycache__/feature_engineering.cpython-311.pyc and b/__pycache__/feature_engineering.cpython-311.pyc differ
 
feature_view.ipynb ADDED
File without changes
news_articles.csv ADDED
The diff for this file is too large to render. See raw diff
 
news_exp.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
news_experimenting.ipynb DELETED
@@ -1,410 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 31,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "import requests\n",
10
- "import pandas as pd\n",
11
- "from datetime import datetime\n",
12
- "from textblob import TextBlob"
13
- ]
14
- },
15
- {
16
- "cell_type": "code",
17
- "execution_count": 41,
18
- "metadata": {},
19
- "outputs": [],
20
- "source": [
21
- "from dotenv import load_dotenv\n",
22
- "import os"
23
- ]
24
- },
25
- {
26
- "cell_type": "code",
27
- "execution_count": 43,
28
- "metadata": {},
29
- "outputs": [],
30
- "source": [
31
- "load_dotenv()\n",
32
- "\n",
33
- "def fetch_tesla_news(api_key, start_date, end_date):\n",
34
- " url = \"https://api.marketaux.com/v1/news/all?symbols=TSLA&filter_entities=true&language=en&api_token=iy6rRX4oxFrouZocXr8JNpOzaxZLk3UvMfoMGxYs\" # Modify this based on the exact endpoint you need\n",
35
- " headers = {\n",
36
- " \"x-api-key\": api_key\n",
37
- " }\n",
38
- " # Since each page corresponds to a single request, limit the number of pages to 100\n",
39
- " for page in range(1, 101): # start from page 1 to page 100\n",
40
- " params = {\n",
41
- " \"tickers\": \"TSLA\",\n",
42
- " \"filter_entities\": \"true\",\n",
43
- " \"language\": \"en\",\n",
44
- " \"from\": start_date,\n",
45
- " \"to\": end_date,\n",
46
- " \"page\": page\n",
47
- " }\n",
48
- " \n",
49
- " response = requests.get(url, headers=headers, params=params)\n",
50
- " if response.status_code == 200:\n",
51
- " return pd.json_normalize(response.json()['data'])\n",
52
- " else:\n",
53
- " raise Exception(f\"Failed to fetch data: {response.text}\")"
54
- ]
55
- },
56
- {
57
- "cell_type": "code",
58
- "execution_count": 44,
59
- "metadata": {},
60
- "outputs": [
61
- {
62
- "name": "stdout",
63
- "output_type": "stream",
64
- "text": [
65
- " uuid \\\n",
66
- "0 99ce54fb-eb5b-4c9d-be66-6a56aa9f6de3 \n",
67
- "1 9286f959-f5ea-4b95-88e8-7ba99a472ca5 \n",
68
- "2 92e41393-f923-4c98-ba38-5bab89fedabd \n",
69
- "\n",
70
- " title \\\n",
71
- "0 S&P 500 gains as investors digest positive ear... \n",
72
- "1 3 Defensive Stocks to Protect Your Portfolio i... \n",
73
- "2 Tesla to lay off more than 3,000 employees in ... \n",
74
- "\n",
75
- " description keywords \\\n",
76
- "0 Benchmark S&P 500 rose on\\nTuesday following p... Markets \n",
77
- "1 Stocks Analysis by The Tokenist (Timothy Fries... \n",
78
- "2 Tesla is set to cut 3,332\\njobs in California,... Markets \n",
79
- "\n",
80
- " snippet \\\n",
81
- "0 * Tesla set to kick off Magnificent Seven earn... \n",
82
- "1 After the Iran-Israel situation fizzled out fo... \n",
83
- "2 Tesla, Inc. designs, builds, and sells electri... \n",
84
- "\n",
85
- " url \\\n",
86
- "0 https://www.marketscreener.com/quote/stock/TES... \n",
87
- "1 https://www.investing.com/analysis/3-defensive... \n",
88
- "2 https://www.marketscreener.com/quote/stock/TES... \n",
89
- "\n",
90
- " image_url language \\\n",
91
- "0 https://img.zonebourse.com/reuters/2016-04/201... en \n",
92
- "1 https://i-invdn-com.investing.com/redesign/ima... en \n",
93
- "2 https://www.marketscreener.com/images/reuters/... en \n",
94
- "\n",
95
- " published_at source relevance_score \\\n",
96
- "0 2024-04-23T19:10:43.000000Z marketscreener.com None \n",
97
- "1 2024-04-23T19:06:00.000000Z investing.com None \n",
98
- "2 2024-04-23T18:50:54.000000Z marketscreener.com None \n",
99
- "\n",
100
- " entities \\\n",
101
- "0 [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex... \n",
102
- "1 [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex... \n",
103
- "2 [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex... \n",
104
- "\n",
105
- " similar \n",
106
- "0 [] \n",
107
- "1 [] \n",
108
- "2 [{'uuid': 'df8f1cf3-89ca-4430-975a-131b9c31245... \n"
109
- ]
110
- }
111
- ],
112
- "source": [
113
- "api_key = os.environ.get('news_api')\n",
114
- "tesla_news_df = fetch_tesla_news(api_key, \"2017-01-01\", datetime.now().strftime('%Y-%m-%d'))\n",
115
- "print(tesla_news_df.head())"
116
- ]
117
- },
118
- {
119
- "cell_type": "code",
120
- "execution_count": 40,
121
- "metadata": {},
122
- "outputs": [
123
- {
124
- "data": {
125
- "text/html": [
126
- "<div>\n",
127
- "<style scoped>\n",
128
- " .dataframe tbody tr th:only-of-type {\n",
129
- " vertical-align: middle;\n",
130
- " }\n",
131
- "\n",
132
- " .dataframe tbody tr th {\n",
133
- " vertical-align: top;\n",
134
- " }\n",
135
- "\n",
136
- " .dataframe thead th {\n",
137
- " text-align: right;\n",
138
- " }\n",
139
- "</style>\n",
140
- "<table border=\"1\" class=\"dataframe\">\n",
141
- " <thead>\n",
142
- " <tr style=\"text-align: right;\">\n",
143
- " <th></th>\n",
144
- " <th>uuid</th>\n",
145
- " <th>title</th>\n",
146
- " <th>description</th>\n",
147
- " <th>keywords</th>\n",
148
- " <th>snippet</th>\n",
149
- " <th>url</th>\n",
150
- " <th>image_url</th>\n",
151
- " <th>language</th>\n",
152
- " <th>published_at</th>\n",
153
- " <th>source</th>\n",
154
- " <th>relevance_score</th>\n",
155
- " <th>entities</th>\n",
156
- " <th>similar</th>\n",
157
- " </tr>\n",
158
- " </thead>\n",
159
- " <tbody>\n",
160
- " <tr>\n",
161
- " <th>0</th>\n",
162
- " <td>daf76e3e-caea-4c92-a461-6b3132655788</td>\n",
163
- " <td>Stock market today: US futures climb as earnin...</td>\n",
164
- " <td>The wait for Tesla results is on as investors ...</td>\n",
165
- " <td></td>\n",
166
- " <td>US stocks climbed on Tuesday, on track for fur...</td>\n",
167
- " <td>https://finance.yahoo.com/news/stock-market-to...</td>\n",
168
- " <td>https://s.yimg.com/ny/api/res/1.2/mqjC0VUO61dY...</td>\n",
169
- " <td>en</td>\n",
170
- " <td>2024-04-23T11:22:53.000000Z</td>\n",
171
- " <td>finance.yahoo.com</td>\n",
172
- " <td>None</td>\n",
173
- " <td>[{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex...</td>\n",
174
- " <td>[{'uuid': '01cd65de-530d-407a-96c9-5b3359e98a0...</td>\n",
175
- " </tr>\n",
176
- " <tr>\n",
177
- " <th>1</th>\n",
178
- " <td>8dab10ca-5b23-465a-aa86-360bc987a774</td>\n",
179
- " <td>5 things to know before the stock market opens...</td>\n",
180
- " <td>Here are the most important news items that in...</td>\n",
181
- " <td>Investment strategy, Economy, Markets, Busines...</td>\n",
182
- " <td>In this article CPRI Follow your favorite stoc...</td>\n",
183
- " <td>https://www.cnbc.com/2024/04/23/5-things-to-kn...</td>\n",
184
- " <td>https://image.cnbcfm.com/api/v1/image/10692170...</td>\n",
185
- " <td>en</td>\n",
186
- " <td>2024-04-23T11:16:00.000000Z</td>\n",
187
- " <td>cnbc.com</td>\n",
188
- " <td>None</td>\n",
189
- " <td>[{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex...</td>\n",
190
- " <td>[]</td>\n",
191
- " </tr>\n",
192
- " <tr>\n",
193
- " <th>2</th>\n",
194
- " <td>b8c381b9-4187-433e-ad15-cecc9d227b13</td>\n",
195
- " <td>Wall Street Breakfast Podcast: UNH: Personal D...</td>\n",
196
- " <td>UnitedHealth confirms personal data compromise...</td>\n",
197
- " <td></td>\n",
198
- " <td>JHVEPhoto/iStock Editorial via Getty Images\\n\\...</td>\n",
199
- " <td>https://seekingalpha.com/article/4685243-wall-...</td>\n",
200
- " <td>https://static.seekingalpha.com/cdn/s3/uploads...</td>\n",
201
- " <td>en</td>\n",
202
- " <td>2024-04-23T11:00:00.000000Z</td>\n",
203
- " <td>seekingalpha.com</td>\n",
204
- " <td>None</td>\n",
205
- " <td>[{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex...</td>\n",
206
- " <td>[]</td>\n",
207
- " </tr>\n",
208
- " </tbody>\n",
209
- "</table>\n",
210
- "</div>"
211
- ],
212
- "text/plain": [
213
- " uuid \\\n",
214
- "0 daf76e3e-caea-4c92-a461-6b3132655788 \n",
215
- "1 8dab10ca-5b23-465a-aa86-360bc987a774 \n",
216
- "2 b8c381b9-4187-433e-ad15-cecc9d227b13 \n",
217
- "\n",
218
- " title \\\n",
219
- "0 Stock market today: US futures climb as earnin... \n",
220
- "1 5 things to know before the stock market opens... \n",
221
- "2 Wall Street Breakfast Podcast: UNH: Personal D... \n",
222
- "\n",
223
- " description \\\n",
224
- "0 The wait for Tesla results is on as investors ... \n",
225
- "1 Here are the most important news items that in... \n",
226
- "2 UnitedHealth confirms personal data compromise... \n",
227
- "\n",
228
- " keywords \\\n",
229
- "0 \n",
230
- "1 Investment strategy, Economy, Markets, Busines... \n",
231
- "2 \n",
232
- "\n",
233
- " snippet \\\n",
234
- "0 US stocks climbed on Tuesday, on track for fur... \n",
235
- "1 In this article CPRI Follow your favorite stoc... \n",
236
- "2 JHVEPhoto/iStock Editorial via Getty Images\\n\\... \n",
237
- "\n",
238
- " url \\\n",
239
- "0 https://finance.yahoo.com/news/stock-market-to... \n",
240
- "1 https://www.cnbc.com/2024/04/23/5-things-to-kn... \n",
241
- "2 https://seekingalpha.com/article/4685243-wall-... \n",
242
- "\n",
243
- " image_url language \\\n",
244
- "0 https://s.yimg.com/ny/api/res/1.2/mqjC0VUO61dY... en \n",
245
- "1 https://image.cnbcfm.com/api/v1/image/10692170... en \n",
246
- "2 https://static.seekingalpha.com/cdn/s3/uploads... en \n",
247
- "\n",
248
- " published_at source relevance_score \\\n",
249
- "0 2024-04-23T11:22:53.000000Z finance.yahoo.com None \n",
250
- "1 2024-04-23T11:16:00.000000Z cnbc.com None \n",
251
- "2 2024-04-23T11:00:00.000000Z seekingalpha.com None \n",
252
- "\n",
253
- " entities \\\n",
254
- "0 [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex... \n",
255
- "1 [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex... \n",
256
- "2 [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex... \n",
257
- "\n",
258
- " similar \n",
259
- "0 [{'uuid': '01cd65de-530d-407a-96c9-5b3359e98a0... \n",
260
- "1 [] \n",
261
- "2 [] "
262
- ]
263
- },
264
- "execution_count": 40,
265
- "metadata": {},
266
- "output_type": "execute_result"
267
- }
268
- ],
269
- "source": [
270
- "tesla_news_df"
271
- ]
272
- },
273
- {
274
- "cell_type": "code",
275
- "execution_count": 30,
276
- "metadata": {},
277
- "outputs": [
278
- {
279
- "name": "stdout",
280
- "output_type": "stream",
281
- "text": [
282
- "uuid 0\n",
283
- "title 0\n",
284
- "description 0\n",
285
- "keywords 0\n",
286
- "snippet 0\n",
287
- "url 0\n",
288
- "image_url 0\n",
289
- "language 0\n",
290
- "published_at 0\n",
291
- "source 0\n",
292
- "relevance_score 3\n",
293
- "entities 0\n",
294
- "similar 0\n",
295
- "dtype: int64\n"
296
- ]
297
- }
298
- ],
299
- "source": [
300
- "# Clean text data, Check for any missing values or inconsistencies in the data\n",
301
- "tesla_news_df['description'] = tesla_news_df['description'].apply(lambda x: x.lower().replace('[^\\w\\s]', ''))\n",
302
- "\n",
303
- "# Check for any missing values\n",
304
- "print(tesla_news_df.isnull().sum())\n"
305
- ]
306
- },
307
- {
308
- "cell_type": "code",
309
- "execution_count": 32,
310
- "metadata": {},
311
- "outputs": [
312
- {
313
- "name": "stdout",
314
- "output_type": "stream",
315
- "text": [
316
- " title sentiment\n",
317
- "0 Wall Street Breakfast: What Moved Markets 0.197443\n",
318
- "1 1 \"Magnificent Seven\" Stock With 1,234% Upside... 1.000000\n",
319
- "2 Market Today: Tech Giants Reignite AI Craze, A... -0.024242\n"
320
- ]
321
- }
322
- ],
323
- "source": [
324
- "# Sentiment analysis on descriptions\n",
325
- "tesla_news_df['sentiment'] = tesla_news_df['description'].apply(lambda text: TextBlob(text).sentiment.polarity)\n",
326
- "\n",
327
- "print(tesla_news_df[['title', 'sentiment']])\n"
328
- ]
329
- },
330
- {
331
- "cell_type": "code",
332
- "execution_count": 35,
333
- "metadata": {},
334
- "outputs": [],
335
- "source": [
336
- "# Example: Counting the number of articles per day\n",
337
- "tesla_news_df['published_at'] = pd.to_datetime(tesla_news_df['published_at']) # Convert to datetime\n",
338
- "tesla_news_df['date'] = tesla_news_df['published_at'].dt.date\n",
339
- "daily_news_count = tesla_news_df.groupby('date').size()\n"
340
- ]
341
- },
342
- {
343
- "cell_type": "code",
344
- "execution_count": 37,
345
- "metadata": {},
346
- "outputs": [
347
- {
348
- "name": "stdout",
349
- "output_type": "stream",
350
- "text": [
351
- "<class 'pandas.core.frame.DataFrame'>\n",
352
- "RangeIndex: 3 entries, 0 to 2\n",
353
- "Data columns (total 15 columns):\n",
354
- " # Column Non-Null Count Dtype \n",
355
- "--- ------ -------------- ----- \n",
356
- " 0 uuid 3 non-null object \n",
357
- " 1 title 3 non-null object \n",
358
- " 2 description 3 non-null object \n",
359
- " 3 keywords 3 non-null object \n",
360
- " 4 snippet 3 non-null object \n",
361
- " 5 url 3 non-null object \n",
362
- " 6 image_url 3 non-null object \n",
363
- " 7 language 3 non-null object \n",
364
- " 8 published_at 3 non-null datetime64[ns, UTC]\n",
365
- " 9 source 3 non-null object \n",
366
- " 10 relevance_score 0 non-null object \n",
367
- " 11 entities 3 non-null object \n",
368
- " 12 similar 3 non-null object \n",
369
- " 13 sentiment 3 non-null float64 \n",
370
- " 14 date 3 non-null object \n",
371
- "dtypes: datetime64[ns, UTC](1), float64(1), object(13)\n",
372
- "memory usage: 492.0+ bytes\n"
373
- ]
374
- }
375
- ],
376
- "source": [
377
- "tesla_news_df.info()"
378
- ]
379
- },
380
- {
381
- "cell_type": "code",
382
- "execution_count": null,
383
- "metadata": {},
384
- "outputs": [],
385
- "source": []
386
- }
387
- ],
388
- "metadata": {
389
- "kernelspec": {
390
- "display_name": "base",
391
- "language": "python",
392
- "name": "python3"
393
- },
394
- "language_info": {
395
- "codemirror_mode": {
396
- "name": "ipython",
397
- "version": 3
398
- },
399
- "file_extension": ".py",
400
- "mimetype": "text/x-python",
401
- "name": "python",
402
- "nbconvert_exporter": "python",
403
- "pygments_lexer": "ipython3",
404
- "version": "3.11.4"
405
- },
406
- "orig_nbformat": 4
407
- },
408
- "nbformat": 4,
409
- "nbformat_minor": 2
410
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
news_experimenting1.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
tesla_articles.json DELETED
The diff for this file is too large to render. See raw diff