{ "cells": [ { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "import requests\n", "import pandas as pd\n", "from datetime import datetime\n", "from textblob import TextBlob" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "def fetch_tesla_news(api_key, start_date, end_date):\n", " url = \"https://api.marketaux.com/v1/news/all?symbols=TSLA&filter_entities=true&language=en&api_token=iy6rRX4oxFrouZocXr8JNpOzaxZLk3UvMfoMGxYs\" # Modify this based on the exact endpoint you need\n", " headers = {\n", " \"x-api-key\": api_key\n", " }\n", " # Since each page corresponds to a single request, limit the number of pages to 100\n", " for page in range(1, 101): # start from page 1 to page 100\n", " params = {\n", " \"tickers\": \"TSLA\",\n", " \"filter_entities\": \"true\",\n", " \"language\": \"en\",\n", " \"from\": start_date,\n", " \"to\": end_date,\n", " \"page\": page\n", " }\n", " \n", " response = requests.get(url, headers=headers, params=params)\n", " if response.status_code == 200:\n", " return pd.json_normalize(response.json()['data'])\n", " else:\n", " raise Exception(f\"Failed to fetch data: {response.text}\")" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " uuid \\\n", "0 daf76e3e-caea-4c92-a461-6b3132655788 \n", "1 8dab10ca-5b23-465a-aa86-360bc987a774 \n", "2 b8c381b9-4187-433e-ad15-cecc9d227b13 \n", "\n", " title \\\n", "0 Stock market today: US futures climb as earnin... \n", "1 5 things to know before the stock market opens... \n", "2 Wall Street Breakfast Podcast: UNH: Personal D... \n", "\n", " description \\\n", "0 The wait for Tesla results is on as investors ... \n", "1 Here are the most important news items that in... \n", "2 UnitedHealth confirms personal data compromise... \n", "\n", " keywords \\\n", "0 \n", "1 Investment strategy, Economy, Markets, Busines... \n", "2 \n", "\n", " snippet \\\n", "0 US stocks climbed on Tuesday, on track for fur... \n", "1 In this article CPRI Follow your favorite stoc... \n", "2 JHVEPhoto/iStock Editorial via Getty Images\\n\\... \n", "\n", " url \\\n", "0 https://finance.yahoo.com/news/stock-market-to... \n", "1 https://www.cnbc.com/2024/04/23/5-things-to-kn... \n", "2 https://seekingalpha.com/article/4685243-wall-... \n", "\n", " image_url language \\\n", "0 https://s.yimg.com/ny/api/res/1.2/mqjC0VUO61dY... en \n", "1 https://image.cnbcfm.com/api/v1/image/10692170... en \n", "2 https://static.seekingalpha.com/cdn/s3/uploads... en \n", "\n", " published_at source relevance_score \\\n", "0 2024-04-23T11:22:53.000000Z finance.yahoo.com None \n", "1 2024-04-23T11:16:00.000000Z cnbc.com None \n", "2 2024-04-23T11:00:00.000000Z seekingalpha.com None \n", "\n", " entities \\\n", "0 [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex... \n", "1 [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex... \n", "2 [{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex... \n", "\n", " similar \n", "0 [{'uuid': '01cd65de-530d-407a-96c9-5b3359e98a0... \n", "1 [] \n", "2 [] \n" ] } ], "source": [ "api_key = \"iy6rRX4oxFrouZocXr8JNpOzaxZLk3UvMfoMGxYs\"\n", "tesla_news_df = fetch_tesla_news(api_key, \"2017-01-01\", datetime.now().strftime('%Y-%m-%d'))\n", "print(tesla_news_df.head())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "uuid 0\n", "title 0\n", "description 0\n", "keywords 0\n", "snippet 0\n", "url 0\n", "image_url 0\n", "language 0\n", "published_at 0\n", "source 0\n", "relevance_score 3\n", "entities 0\n", "similar 0\n", "dtype: int64\n" ] } ], "source": [ "# Clean text data, Check for any missing values or inconsistencies in the data\n", "tesla_news_df['description'] = tesla_news_df['description'].apply(lambda x: x.lower().replace('[^\\w\\s]', ''))\n", "\n", "# Check for any missing values\n", "print(tesla_news_df.isnull().sum())\n" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " title sentiment\n", "0 Wall Street Breakfast: What Moved Markets 0.197443\n", "1 1 \"Magnificent Seven\" Stock With 1,234% Upside... 1.000000\n", "2 Market Today: Tech Giants Reignite AI Craze, A... -0.024242\n" ] } ], "source": [ "# Sentiment analysis on descriptions\n", "tesla_news_df['sentiment'] = tesla_news_df['description'].apply(lambda text: TextBlob(text).sentiment.polarity)\n", "\n", "print(tesla_news_df[['title', 'sentiment']])\n" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "# Example: Counting the number of articles per day\n", "tesla_news_df['published_at'] = pd.to_datetime(tesla_news_df['published_at']) # Convert to datetime\n", "tesla_news_df['date'] = tesla_news_df['published_at'].dt.date\n", "daily_news_count = tesla_news_df.groupby('date').size()\n" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 3 entries, 0 to 2\n", "Data columns (total 15 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 uuid 3 non-null object \n", " 1 title 3 non-null object \n", " 2 description 3 non-null object \n", " 3 keywords 3 non-null object \n", " 4 snippet 3 non-null object \n", " 5 url 3 non-null object \n", " 6 image_url 3 non-null object \n", " 7 language 3 non-null object \n", " 8 published_at 3 non-null datetime64[ns, UTC]\n", " 9 source 3 non-null object \n", " 10 relevance_score 0 non-null object \n", " 11 entities 3 non-null object \n", " 12 similar 3 non-null object \n", " 13 sentiment 3 non-null float64 \n", " 14 date 3 non-null object \n", "dtypes: datetime64[ns, UTC](1), float64(1), object(13)\n", "memory usage: 492.0+ bytes\n" ] } ], "source": [ "tesla_news_df.info()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }