Spaces:
No application file
No application file
check commit
Browse files- feature_engineering.py +1 -1
- news_experimenting.ipynb +255 -0
- TSLA_news.csv → news_experimenting1.ipynb +0 -0
- tesla_articles.json +0 -0
feature_engineering.py
CHANGED
@@ -21,7 +21,7 @@ def getNews(api_key,endpoint,ticker,from_date,to_date,num=1000):
|
|
21 |
response = requests.get(endpoint, params=params)
|
22 |
|
23 |
# Print the response from the API
|
24 |
-
|
25 |
|
26 |
#Return a Pandas dataframe from the response
|
27 |
return pd.DataFrame(response.json())
|
|
|
21 |
response = requests.get(endpoint, params=params)
|
22 |
|
23 |
# Print the response from the API
|
24 |
+
print(response.json())
|
25 |
|
26 |
#Return a Pandas dataframe from the response
|
27 |
return pd.DataFrame(response.json())
|
news_experimenting.ipynb
ADDED
@@ -0,0 +1,255 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 31,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import requests\n",
|
10 |
+
"import pandas as pd\n",
|
11 |
+
"from datetime import datetime\n",
|
12 |
+
"from textblob import TextBlob"
|
13 |
+
]
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"cell_type": "code",
|
17 |
+
"execution_count": 38,
|
18 |
+
"metadata": {},
|
19 |
+
"outputs": [],
|
20 |
+
"source": [
|
21 |
+
"def fetch_tesla_news(api_key, start_date, end_date):\n",
|
22 |
+
" url = \"https://api.marketaux.com/v1/news/all?symbols=TSLA&filter_entities=true&language=en&api_token=iy6rRX4oxFrouZocXr8JNpOzaxZLk3UvMfoMGxYs\" # Modify this based on the exact endpoint you need\n",
|
23 |
+
" headers = {\n",
|
24 |
+
" \"x-api-key\": api_key\n",
|
25 |
+
" }\n",
|
26 |
+
" # Since each page corresponds to a single request, limit the number of pages to 100\n",
|
27 |
+
" for page in range(1, 101): # start from page 1 to page 100\n",
|
28 |
+
" params = {\n",
|
29 |
+
" \"tickers\": \"TSLA\",\n",
|
30 |
+
" \"filter_entities\": \"true\",\n",
|
31 |
+
" \"language\": \"en\",\n",
|
32 |
+
" \"from\": start_date,\n",
|
33 |
+
" \"to\": end_date,\n",
|
34 |
+
" \"page\": page\n",
|
35 |
+
" }\n",
|
36 |
+
" \n",
|
37 |
+
" response = requests.get(url, headers=headers, params=params)\n",
|
38 |
+
" if response.status_code == 200:\n",
|
39 |
+
" return pd.json_normalize(response.json()['data'])\n",
|
40 |
+
" else:\n",
|
41 |
+
" raise Exception(f\"Failed to fetch data: {response.text}\")"
|
42 |
+
]
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"cell_type": "code",
|
46 |
+
"execution_count": 39,
|
47 |
+
"metadata": {},
|
48 |
+
"outputs": [
|
49 |
+
{
|
50 |
+
"name": "stdout",
|
51 |
+
"output_type": "stream",
|
52 |
+
"text": [
|
53 |
+
" uuid \\\n",
|
54 |
+
"0 daf76e3e-caea-4c92-a461-6b3132655788 \n",
|
55 |
+
"1 8dab10ca-5b23-465a-aa86-360bc987a774 \n",
|
56 |
+
"2 b8c381b9-4187-433e-ad15-cecc9d227b13 \n",
|
57 |
+
"\n",
|
58 |
+
" title \\\n",
|
59 |
+
"0 Stock market today: US futures climb as earnin... \n",
|
60 |
+
"1 5 things to know before the stock market opens... \n",
|
61 |
+
"2 Wall Street Breakfast Podcast: UNH: Personal D... \n",
|
62 |
+
"\n",
|
63 |
+
" description \\\n",
|
64 |
+
"0 The wait for Tesla results is on as investors ... \n",
|
65 |
+
"1 Here are the most important news items that in... \n",
|
66 |
+
"2 UnitedHealth confirms personal data compromise... \n",
|
67 |
+
"\n",
|
68 |
+
" keywords \\\n",
|
69 |
+
"0 \n",
|
70 |
+
"1 Investment strategy, Economy, Markets, Busines... \n",
|
71 |
+
"2 \n",
|
72 |
+
"\n",
|
73 |
+
" snippet \\\n",
|
74 |
+
"0 US stocks climbed on Tuesday, on track for fur... \n",
|
75 |
+
"1 In this article CPRI Follow your favorite stoc... \n",
|
76 |
+
"2 JHVEPhoto/iStock Editorial via Getty Images\\n\\... \n",
|
77 |
+
"\n",
|
78 |
+
" url \\\n",
|
79 |
+
"0 https://finance.yahoo.com/news/stock-market-to... \n",
|
80 |
+
"1 https://www.cnbc.com/2024/04/23/5-things-to-kn... \n",
|
81 |
+
"2 https://seekingalpha.com/article/4685243-wall-... \n",
|
82 |
+
"\n",
|
83 |
+
" image_url language \\\n",
|
84 |
+
"0 https://s.yimg.com/ny/api/res/1.2/mqjC0VUO61dY... en \n",
|
85 |
+
"1 https://image.cnbcfm.com/api/v1/image/10692170... en \n",
|
86 |
+
"2 https://static.seekingalpha.com/cdn/s3/uploads... en \n",
|
87 |
+
"\n",
|
88 |
+
" published_at source relevance_score \\\n",
|
89 |
+
"0 2024-04-23T11:22:53.000000Z finance.yahoo.com None \n",
|
90 |
+
"1 2024-04-23T11:16:00.000000Z cnbc.com None \n",
|
91 |
+
"2 2024-04-23T11:00:00.000000Z seekingalpha.com None \n",
|
92 |
+
"\n",
|
93 |
+
" entities \\\n",
|
94 |
+
"0 [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex... \n",
|
95 |
+
"1 [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex... \n",
|
96 |
+
"2 [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex... \n",
|
97 |
+
"\n",
|
98 |
+
" similar \n",
|
99 |
+
"0 [{'uuid': '01cd65de-530d-407a-96c9-5b3359e98a0... \n",
|
100 |
+
"1 [] \n",
|
101 |
+
"2 [] \n"
|
102 |
+
]
|
103 |
+
}
|
104 |
+
],
|
105 |
+
"source": [
|
106 |
+
"api_key = \"iy6rRX4oxFrouZocXr8JNpOzaxZLk3UvMfoMGxYs\"\n",
|
107 |
+
"tesla_news_df = fetch_tesla_news(api_key, \"2017-01-01\", datetime.now().strftime('%Y-%m-%d'))\n",
|
108 |
+
"print(tesla_news_df.head())"
|
109 |
+
]
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"cell_type": "code",
|
113 |
+
"execution_count": null,
|
114 |
+
"metadata": {},
|
115 |
+
"outputs": [],
|
116 |
+
"source": []
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"cell_type": "code",
|
120 |
+
"execution_count": 30,
|
121 |
+
"metadata": {},
|
122 |
+
"outputs": [
|
123 |
+
{
|
124 |
+
"name": "stdout",
|
125 |
+
"output_type": "stream",
|
126 |
+
"text": [
|
127 |
+
"uuid 0\n",
|
128 |
+
"title 0\n",
|
129 |
+
"description 0\n",
|
130 |
+
"keywords 0\n",
|
131 |
+
"snippet 0\n",
|
132 |
+
"url 0\n",
|
133 |
+
"image_url 0\n",
|
134 |
+
"language 0\n",
|
135 |
+
"published_at 0\n",
|
136 |
+
"source 0\n",
|
137 |
+
"relevance_score 3\n",
|
138 |
+
"entities 0\n",
|
139 |
+
"similar 0\n",
|
140 |
+
"dtype: int64\n"
|
141 |
+
]
|
142 |
+
}
|
143 |
+
],
|
144 |
+
"source": [
|
145 |
+
"# Clean text data, Check for any missing values or inconsistencies in the data\n",
|
146 |
+
"tesla_news_df['description'] = tesla_news_df['description'].apply(lambda x: x.lower().replace('[^\\w\\s]', ''))\n",
|
147 |
+
"\n",
|
148 |
+
"# Check for any missing values\n",
|
149 |
+
"print(tesla_news_df.isnull().sum())\n"
|
150 |
+
]
|
151 |
+
},
|
152 |
+
{
|
153 |
+
"cell_type": "code",
|
154 |
+
"execution_count": 32,
|
155 |
+
"metadata": {},
|
156 |
+
"outputs": [
|
157 |
+
{
|
158 |
+
"name": "stdout",
|
159 |
+
"output_type": "stream",
|
160 |
+
"text": [
|
161 |
+
" title sentiment\n",
|
162 |
+
"0 Wall Street Breakfast: What Moved Markets 0.197443\n",
|
163 |
+
"1 1 \"Magnificent Seven\" Stock With 1,234% Upside... 1.000000\n",
|
164 |
+
"2 Market Today: Tech Giants Reignite AI Craze, A... -0.024242\n"
|
165 |
+
]
|
166 |
+
}
|
167 |
+
],
|
168 |
+
"source": [
|
169 |
+
"# Sentiment analysis on descriptions\n",
|
170 |
+
"tesla_news_df['sentiment'] = tesla_news_df['description'].apply(lambda text: TextBlob(text).sentiment.polarity)\n",
|
171 |
+
"\n",
|
172 |
+
"print(tesla_news_df[['title', 'sentiment']])\n"
|
173 |
+
]
|
174 |
+
},
|
175 |
+
{
|
176 |
+
"cell_type": "code",
|
177 |
+
"execution_count": 35,
|
178 |
+
"metadata": {},
|
179 |
+
"outputs": [],
|
180 |
+
"source": [
|
181 |
+
"# Example: Counting the number of articles per day\n",
|
182 |
+
"tesla_news_df['published_at'] = pd.to_datetime(tesla_news_df['published_at']) # Convert to datetime\n",
|
183 |
+
"tesla_news_df['date'] = tesla_news_df['published_at'].dt.date\n",
|
184 |
+
"daily_news_count = tesla_news_df.groupby('date').size()\n"
|
185 |
+
]
|
186 |
+
},
|
187 |
+
{
|
188 |
+
"cell_type": "code",
|
189 |
+
"execution_count": 37,
|
190 |
+
"metadata": {},
|
191 |
+
"outputs": [
|
192 |
+
{
|
193 |
+
"name": "stdout",
|
194 |
+
"output_type": "stream",
|
195 |
+
"text": [
|
196 |
+
"<class 'pandas.core.frame.DataFrame'>\n",
|
197 |
+
"RangeIndex: 3 entries, 0 to 2\n",
|
198 |
+
"Data columns (total 15 columns):\n",
|
199 |
+
" # Column Non-Null Count Dtype \n",
|
200 |
+
"--- ------ -------------- ----- \n",
|
201 |
+
" 0 uuid 3 non-null object \n",
|
202 |
+
" 1 title 3 non-null object \n",
|
203 |
+
" 2 description 3 non-null object \n",
|
204 |
+
" 3 keywords 3 non-null object \n",
|
205 |
+
" 4 snippet 3 non-null object \n",
|
206 |
+
" 5 url 3 non-null object \n",
|
207 |
+
" 6 image_url 3 non-null object \n",
|
208 |
+
" 7 language 3 non-null object \n",
|
209 |
+
" 8 published_at 3 non-null datetime64[ns, UTC]\n",
|
210 |
+
" 9 source 3 non-null object \n",
|
211 |
+
" 10 relevance_score 0 non-null object \n",
|
212 |
+
" 11 entities 3 non-null object \n",
|
213 |
+
" 12 similar 3 non-null object \n",
|
214 |
+
" 13 sentiment 3 non-null float64 \n",
|
215 |
+
" 14 date 3 non-null object \n",
|
216 |
+
"dtypes: datetime64[ns, UTC](1), float64(1), object(13)\n",
|
217 |
+
"memory usage: 492.0+ bytes\n"
|
218 |
+
]
|
219 |
+
}
|
220 |
+
],
|
221 |
+
"source": [
|
222 |
+
"tesla_news_df.info()"
|
223 |
+
]
|
224 |
+
},
|
225 |
+
{
|
226 |
+
"cell_type": "code",
|
227 |
+
"execution_count": null,
|
228 |
+
"metadata": {},
|
229 |
+
"outputs": [],
|
230 |
+
"source": []
|
231 |
+
}
|
232 |
+
],
|
233 |
+
"metadata": {
|
234 |
+
"kernelspec": {
|
235 |
+
"display_name": "base",
|
236 |
+
"language": "python",
|
237 |
+
"name": "python3"
|
238 |
+
},
|
239 |
+
"language_info": {
|
240 |
+
"codemirror_mode": {
|
241 |
+
"name": "ipython",
|
242 |
+
"version": 3
|
243 |
+
},
|
244 |
+
"file_extension": ".py",
|
245 |
+
"mimetype": "text/x-python",
|
246 |
+
"name": "python",
|
247 |
+
"nbconvert_exporter": "python",
|
248 |
+
"pygments_lexer": "ipython3",
|
249 |
+
"version": "3.11.4"
|
250 |
+
},
|
251 |
+
"orig_nbformat": 4
|
252 |
+
},
|
253 |
+
"nbformat": 4,
|
254 |
+
"nbformat_minor": 2
|
255 |
+
}
|
TSLA_news.csv → news_experimenting1.ipynb
RENAMED
File without changes
|
tesla_articles.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|