{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "import pandas as pd\n",
    "from datetime import datetime\n",
    "from textblob import TextBlob"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "def fetch_tesla_news(api_key, start_date, end_date):\n",
    "    url = \"https://api.marketaux.com/v1/news/all?symbols=TSLA&filter_entities=true&language=en&api_token=iy6rRX4oxFrouZocXr8JNpOzaxZLk3UvMfoMGxYs\"  # Modify this based on the exact endpoint you need\n",
    "    headers = {\n",
    "        \"x-api-key\": api_key\n",
    "    }\n",
    "     # Since each page corresponds to a single request, limit the number of pages to 100\n",
    "    for page in range(1, 101):  # start from page 1 to page 100\n",
    "        params = {\n",
    "            \"tickers\": \"TSLA\",\n",
    "            \"filter_entities\": \"true\",\n",
    "            \"language\": \"en\",\n",
    "            \"from\": start_date,\n",
    "            \"to\": end_date,\n",
    "            \"page\": page\n",
    "        }\n",
    "    \n",
    "    response = requests.get(url, headers=headers, params=params)\n",
    "    if response.status_code == 200:\n",
    "        return pd.json_normalize(response.json()['data'])\n",
    "    else:\n",
    "        raise Exception(f\"Failed to fetch data: {response.text}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                                   uuid  \\\n",
      "0  daf76e3e-caea-4c92-a461-6b3132655788   \n",
      "1  8dab10ca-5b23-465a-aa86-360bc987a774   \n",
      "2  b8c381b9-4187-433e-ad15-cecc9d227b13   \n",
      "\n",
      "                                               title  \\\n",
      "0  Stock market today: US futures climb as earnin...   \n",
      "1  5 things to know before the stock market opens...   \n",
      "2  Wall Street Breakfast Podcast: UNH: Personal D...   \n",
      "\n",
      "                                         description  \\\n",
      "0  The wait for Tesla results is on as investors ...   \n",
      "1  Here are the most important news items that in...   \n",
      "2  UnitedHealth confirms personal data compromise...   \n",
      "\n",
      "                                            keywords  \\\n",
      "0                                                      \n",
      "1  Investment strategy, Economy, Markets, Busines...   \n",
      "2                                                      \n",
      "\n",
      "                                             snippet  \\\n",
      "0  US stocks climbed on Tuesday, on track for fur...   \n",
      "1  In this article CPRI Follow your favorite stoc...   \n",
      "2  JHVEPhoto/iStock Editorial via Getty Images\\n\\...   \n",
      "\n",
      "                                                 url  \\\n",
      "0  https://finance.yahoo.com/news/stock-market-to...   \n",
      "1  https://www.cnbc.com/2024/04/23/5-things-to-kn...   \n",
      "2  https://seekingalpha.com/article/4685243-wall-...   \n",
      "\n",
      "                                           image_url language  \\\n",
      "0  https://s.yimg.com/ny/api/res/1.2/mqjC0VUO61dY...       en   \n",
      "1  https://image.cnbcfm.com/api/v1/image/10692170...       en   \n",
      "2  https://static.seekingalpha.com/cdn/s3/uploads...       en   \n",
      "\n",
      "                  published_at             source relevance_score  \\\n",
      "0  2024-04-23T11:22:53.000000Z  finance.yahoo.com            None   \n",
      "1  2024-04-23T11:16:00.000000Z           cnbc.com            None   \n",
      "2  2024-04-23T11:00:00.000000Z   seekingalpha.com            None   \n",
      "\n",
      "                                            entities  \\\n",
      "0  [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex...   \n",
      "1  [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex...   \n",
      "2  [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex...   \n",
      "\n",
      "                                             similar  \n",
      "0  [{'uuid': '01cd65de-530d-407a-96c9-5b3359e98a0...  \n",
      "1                                                 []  \n",
      "2                                                 []  \n"
     ]
    }
   ],
   "source": [
    "api_key = \"iy6rRX4oxFrouZocXr8JNpOzaxZLk3UvMfoMGxYs\"\n",
    "tesla_news_df = fetch_tesla_news(api_key, \"2017-01-01\", datetime.now().strftime('%Y-%m-%d'))\n",
    "print(tesla_news_df.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "uuid               0\n",
      "title              0\n",
      "description        0\n",
      "keywords           0\n",
      "snippet            0\n",
      "url                0\n",
      "image_url          0\n",
      "language           0\n",
      "published_at       0\n",
      "source             0\n",
      "relevance_score    3\n",
      "entities           0\n",
      "similar            0\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# Clean text data, Check for any missing values or inconsistencies in the data\n",
    "tesla_news_df['description'] = tesla_news_df['description'].apply(lambda x: x.lower().replace('[^\\w\\s]', ''))\n",
    "\n",
    "# Check for any missing values\n",
    "print(tesla_news_df.isnull().sum())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                                               title  sentiment\n",
      "0          Wall Street Breakfast: What Moved Markets   0.197443\n",
      "1  1 \"Magnificent Seven\" Stock With 1,234% Upside...   1.000000\n",
      "2  Market Today: Tech Giants Reignite AI Craze, A...  -0.024242\n"
     ]
    }
   ],
   "source": [
    "# Sentiment analysis on descriptions\n",
    "tesla_news_df['sentiment'] = tesla_news_df['description'].apply(lambda text: TextBlob(text).sentiment.polarity)\n",
    "\n",
    "print(tesla_news_df[['title', 'sentiment']])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Example: Counting the number of articles per day\n",
    "tesla_news_df['published_at'] = pd.to_datetime(tesla_news_df['published_at'])  # Convert to datetime\n",
    "tesla_news_df['date'] = tesla_news_df['published_at'].dt.date\n",
    "daily_news_count = tesla_news_df.groupby('date').size()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 3 entries, 0 to 2\n",
      "Data columns (total 15 columns):\n",
      " #   Column           Non-Null Count  Dtype              \n",
      "---  ------           --------------  -----              \n",
      " 0   uuid             3 non-null      object             \n",
      " 1   title            3 non-null      object             \n",
      " 2   description      3 non-null      object             \n",
      " 3   keywords         3 non-null      object             \n",
      " 4   snippet          3 non-null      object             \n",
      " 5   url              3 non-null      object             \n",
      " 6   image_url        3 non-null      object             \n",
      " 7   language         3 non-null      object             \n",
      " 8   published_at     3 non-null      datetime64[ns, UTC]\n",
      " 9   source           3 non-null      object             \n",
      " 10  relevance_score  0 non-null      object             \n",
      " 11  entities         3 non-null      object             \n",
      " 12  similar          3 non-null      object             \n",
      " 13  sentiment        3 non-null      float64            \n",
      " 14  date             3 non-null      object             \n",
      "dtypes: datetime64[ns, UTC](1), float64(1), object(13)\n",
      "memory usage: 492.0+ bytes\n"
     ]
    }
   ],
   "source": [
    "tesla_news_df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}