Spaces:

mtzeve
/

stocks-prediction-app

No application file

App Files Files Community

mtzeve commited on Apr 29, 2024

Commit

cf1f1a2

1 Parent(s): b6eae30

check commit

Browse files

Files changed (4) hide show

feature_engineering.py +1 -1
news_experimenting.ipynb +255 -0
TSLA_news.csv → news_experimenting1.ipynb +0 -0
tesla_articles.json +0 -0

feature_engineering.py CHANGED Viewed

@@ -21,7 +21,7 @@ def getNews(api_key,endpoint,ticker,from_date,to_date,num=1000):
     response = requests.get(endpoint, params=params)
     # Print the response from the API
-    #print(response.json())
     #Return a Pandas dataframe from the response
     return pd.DataFrame(response.json())

     response = requests.get(endpoint, params=params)
     # Print the response from the API
+    print(response.json())
     #Return a Pandas dataframe from the response
     return pd.DataFrame(response.json())

news_experimenting.ipynb ADDED Viewed

	@@ -0,0 +1,255 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "import pandas as pd\n",
+    "from datetime import datetime\n",
+    "from textblob import TextBlob"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def fetch_tesla_news(api_key, start_date, end_date):\n",
+    "    url = \"https://api.marketaux.com/v1/news/all?symbols=TSLA&filter_entities=true&language=en&api_token=iy6rRX4oxFrouZocXr8JNpOzaxZLk3UvMfoMGxYs\"  # Modify this based on the exact endpoint you need\n",
+    "    headers = {\n",
+    "        \"x-api-key\": api_key\n",
+    "    }\n",
+    "     # Since each page corresponds to a single request, limit the number of pages to 100\n",
+    "    for page in range(1, 101):  # start from page 1 to page 100\n",
+    "        params = {\n",
+    "            \"tickers\": \"TSLA\",\n",
+    "            \"filter_entities\": \"true\",\n",
+    "            \"language\": \"en\",\n",
+    "            \"from\": start_date,\n",
+    "            \"to\": end_date,\n",
+    "            \"page\": page\n",
+    "        }\n",
+    "    \n",
+    "    response = requests.get(url, headers=headers, params=params)\n",
+    "    if response.status_code == 200:\n",
+    "        return pd.json_normalize(response.json()['data'])\n",
+    "    else:\n",
+    "        raise Exception(f\"Failed to fetch data: {response.text}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "                                   uuid  \\\n",
+      "0  daf76e3e-caea-4c92-a461-6b3132655788   \n",
+      "1  8dab10ca-5b23-465a-aa86-360bc987a774   \n",
+      "2  b8c381b9-4187-433e-ad15-cecc9d227b13   \n",
+      "\n",
+      "                                               title  \\\n",
+      "0  Stock market today: US futures climb as earnin...   \n",
+      "1  5 things to know before the stock market opens...   \n",
+      "2  Wall Street Breakfast Podcast: UNH: Personal D...   \n",
+      "\n",
+      "                                         description  \\\n",
+      "0  The wait for Tesla results is on as investors ...   \n",
+      "1  Here are the most important news items that in...   \n",
+      "2  UnitedHealth confirms personal data compromise...   \n",
+      "\n",
+      "                                            keywords  \\\n",
+      "0                                                      \n",
+      "1  Investment strategy, Economy, Markets, Busines...   \n",
+      "2                                                      \n",
+      "\n",
+      "                                             snippet  \\\n",
+      "0  US stocks climbed on Tuesday, on track for fur...   \n",
+      "1  In this article CPRI Follow your favorite stoc...   \n",
+      "2  JHVEPhoto/iStock Editorial via Getty Images\\n\\...   \n",
+      "\n",
+      "                                                 url  \\\n",
+      "0  https://finance.yahoo.com/news/stock-market-to...   \n",
+      "1  https://www.cnbc.com/2024/04/23/5-things-to-kn...   \n",
+      "2  https://seekingalpha.com/article/4685243-wall-...   \n",
+      "\n",
+      "                                           image_url language  \\\n",
+      "0  https://s.yimg.com/ny/api/res/1.2/mqjC0VUO61dY...       en   \n",
+      "1  https://image.cnbcfm.com/api/v1/image/10692170...       en   \n",
+      "2  https://static.seekingalpha.com/cdn/s3/uploads...       en   \n",
+      "\n",
+      "                  published_at             source relevance_score  \\\n",
+      "0  2024-04-23T11:22:53.000000Z  finance.yahoo.com            None   \n",
+      "1  2024-04-23T11:16:00.000000Z           cnbc.com            None   \n",
+      "2  2024-04-23T11:00:00.000000Z   seekingalpha.com            None   \n",
+      "\n",
+      "                                            entities  \\\n",
+      "0  [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex...   \n",
+      "1  [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex...   \n",
+      "2  [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex...   \n",
+      "\n",
+      "                                             similar  \n",
+      "0  [{'uuid': '01cd65de-530d-407a-96c9-5b3359e98a0...  \n",
+      "1                                                 []  \n",
+      "2                                                 []  \n"
+     ]
+    }
+   ],
+   "source": [
+    "api_key = \"iy6rRX4oxFrouZocXr8JNpOzaxZLk3UvMfoMGxYs\"\n",
+    "tesla_news_df = fetch_tesla_news(api_key, \"2017-01-01\", datetime.now().strftime('%Y-%m-%d'))\n",
+    "print(tesla_news_df.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "uuid               0\n",
+      "title              0\n",
+      "description        0\n",
+      "keywords           0\n",
+      "snippet            0\n",
+      "url                0\n",
+      "image_url          0\n",
+      "language           0\n",
+      "published_at       0\n",
+      "source             0\n",
+      "relevance_score    3\n",
+      "entities           0\n",
+      "similar            0\n",
+      "dtype: int64\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Clean text data, Check for any missing values or inconsistencies in the data\n",
+    "tesla_news_df['description'] = tesla_news_df['description'].apply(lambda x: x.lower().replace('[^\\w\\s]', ''))\n",
+    "\n",
+    "# Check for any missing values\n",
+    "print(tesla_news_df.isnull().sum())\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "                                               title  sentiment\n",
+      "0          Wall Street Breakfast: What Moved Markets   0.197443\n",
+      "1  1 \"Magnificent Seven\" Stock With 1,234% Upside...   1.000000\n",
+      "2  Market Today: Tech Giants Reignite AI Craze, A...  -0.024242\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Sentiment analysis on descriptions\n",
+    "tesla_news_df['sentiment'] = tesla_news_df['description'].apply(lambda text: TextBlob(text).sentiment.polarity)\n",
+    "\n",
+    "print(tesla_news_df[['title', 'sentiment']])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Example: Counting the number of articles per day\n",
+    "tesla_news_df['published_at'] = pd.to_datetime(tesla_news_df['published_at'])  # Convert to datetime\n",
+    "tesla_news_df['date'] = tesla_news_df['published_at'].dt.date\n",
+    "daily_news_count = tesla_news_df.groupby('date').size()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 3 entries, 0 to 2\n",
+      "Data columns (total 15 columns):\n",
+      " #   Column           Non-Null Count  Dtype              \n",
+      "---  ------           --------------  -----              \n",
+      " 0   uuid             3 non-null      object             \n",
+      " 1   title            3 non-null      object             \n",
+      " 2   description      3 non-null      object             \n",
+      " 3   keywords         3 non-null      object             \n",
+      " 4   snippet          3 non-null      object             \n",
+      " 5   url              3 non-null      object             \n",
+      " 6   image_url        3 non-null      object             \n",
+      " 7   language         3 non-null      object             \n",
+      " 8   published_at     3 non-null      datetime64[ns, UTC]\n",
+      " 9   source           3 non-null      object             \n",
+      " 10  relevance_score  0 non-null      object             \n",
+      " 11  entities         3 non-null      object             \n",
+      " 12  similar          3 non-null      object             \n",
+      " 13  sentiment        3 non-null      float64            \n",
+      " 14  date             3 non-null      object             \n",
+      "dtypes: datetime64[ns, UTC](1), float64(1), object(13)\n",
+      "memory usage: 492.0+ bytes\n"
+     ]
+    }
+   ],
+   "source": [
+    "tesla_news_df.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

TSLA_news.csv → news_experimenting1.ipynb RENAMED Viewed

File without changes

tesla_articles.json ADDED Viewed

The diff for this file is too large to render. See raw diff