{ "cells": [ { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "from dotenv import load_dotenv\n", "from datetime import datetime, timedelta\n", "import requests\n", "import os\n", "import time\n", "import pandas as pd \n", "from news_preprocessing import *" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "load_dotenv()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fetched 50 articles from 2022-05-03 to 2022-06-22\n", "Fetched 50 articles from 2022-06-23 to 2022-08-12\n", "Fetched 50 articles from 2022-08-13 to 2022-10-02\n", "Fetched 50 articles from 2022-10-03 to 2022-11-22\n", "Fetched 50 articles from 2022-11-23 to 2023-01-12\n", "Rate limit reached. Waiting to retry...\n", "Fetched 50 articles from 2023-01-13 to 2023-03-04\n", "Fetched 50 articles from 2023-03-05 to 2023-04-24\n", "Fetched 50 articles from 2023-04-25 to 2023-06-14\n", "Fetched 50 articles from 2023-06-15 to 2023-08-04\n", "Fetched 50 articles from 2023-08-05 to 2023-09-24\n", "Rate limit reached. Waiting to retry...\n", "Fetched 50 articles from 2023-09-25 to 2023-11-14\n", "Fetched 50 articles from 2023-11-15 to 2024-01-04\n", "Fetched 50 articles from 2024-01-05 to 2024-02-24\n", "Fetched 50 articles from 2024-02-25 to 2024-04-15\n", "Fetched 50 articles from 2024-04-16 to 2024-05-02\n", "Total articles fetched: 750\n" ] } ], "source": [ "def fetch_news(api_key, ticker, start_date, end_date):\n", " base_url = os.environ.get(\"endpointnewsp\")\n", " headers = {\"Authorization\": f\"Bearer {api_key}\"}\n", " all_news = []\n", " \n", " end_date = datetime.now() - timedelta(days=1) # Yesterday's date\n", " start_date = end_date - timedelta(days=365 * 2) # Two years back\n", " current_date = start_date\n", "\n", " while current_date <= end_date:\n", " batch_end_date = current_date + timedelta(days=50)\n", " if batch_end_date > end_date:\n", " batch_end_date = end_date\n", "\n", " params = {\n", " \"ticker\": ticker,\n", " \"published_utc.gte\": current_date.strftime('%Y-%m-%d'),\n", " \"published_utc.lte\": batch_end_date.strftime('%Y-%m-%d'),\n", " \"limit\": 50,\n", " \"sort\": \"published_utc\"\n", " }\n", "\n", " try:\n", " response = requests.get(base_url, headers=headers, params=params)\n", " if response.status_code == 200:\n", " data = response.json()\n", " articles = data.get('results', [])\n", " all_news.extend(articles)\n", " print(f\"Fetched {len(articles)} articles from {current_date.strftime('%Y-%m-%d')} to {batch_end_date.strftime('%Y-%m-%d')}\")\n", " current_date = batch_end_date + timedelta(days=1)\n", " elif response.status_code == 429:\n", " print(\"Rate limit reached. Waiting to retry...\")\n", " time.sleep(60) # Wait for 60 seconds or as recommended by the API\n", " continue # Retry the current request\n", " else:\n", " print(f\"Failed to fetch data: {response.status_code}, {response.text}\")\n", " break\n", " except Exception as e:\n", " print(f\"An error occurred: {e}\")\n", " break\n", "\n", " return all_news\n", "\n", "# Example usage\n", "api_key = os.environ.get('newsp_api')\n", "ticker = 'TSLA'\n", "#start_date = datetime(2022, 4, 1) # start date\n", "#end_date = datetime(2024, 4, 1)\n", "news_articles = fetch_news(api_key, ticker, start_date, end_date)\n", "print(f\"Total articles fetched: {len(news_articles)}\")\n" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "# Process the news articles\n", "df = process_news_articles(news_articles)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "DatetimeIndex: 720 entries, 1970-01-01 00:00:00 to 1970-01-01 00:00:00.000000719\n", "Data columns (total 13 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 date 720 non-null object \n", " 1 id 720 non-null object \n", " 2 publisher 720 non-null object \n", " 3 title 720 non-null object \n", " 4 author 720 non-null object \n", " 5 article_url 720 non-null object \n", " 6 tickers 720 non-null object \n", " 7 amp_url 720 non-null object \n", " 8 image_url 720 non-null object \n", " 9 description 720 non-null object \n", " 10 keywords 720 non-null object \n", " 11 sentiment 720 non-null float64\n", " 12 time 720 non-null object \n", "dtypes: float64(1), object(12)\n", "memory usage: 78.8+ KB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateidpublishertitleauthorarticle_urltickersamp_urlimage_urldescriptionkeywordssentimenttime
1970-01-01 00:00:00.0000000002022-06-22nVZWipNVQFFoSi87tF0APXW_uRHl13wat_uR1c0iR6A{'name': 'Benzinga', 'homepage_url': 'https://...Elon Musk Gives New Update On Tesla Cybertruck...Chris Katjehttps://www.benzinga.com/news/22/06/27820587/e...[F, TSLA, RIVN]https://www.benzinga.com/amp/content/27820587https://cdn.benzinga.com/files/images/story/20...A recent interview between Tesla Inc (NASDAQ: ...[News, Interview]0.04022:40:56
1970-01-01 00:00:00.0000000012022-06-22x_9M1zYfZoHn_ptJM3USKxrodDtd4TcTsQSldXlViMw{'name': 'MarketWatch', 'homepage_url': 'https...Tesla's new factories are 'gigantic money furn...MarketWatchhttps://www.marketwatch.com/story/teslas-new-f...[TSLA]No URL providedhttps://images.mktw.net/im-569600/socialTesla Inc.’s two newest car factories have bee...No keywords0.00021:59:00
1970-01-01 00:00:00.0000000022022-06-22SDepdS_qKyhE6vqbR6Fathnn81fYDapZCk3DKT1Xpv4{'name': 'The Motley Fool', 'homepage_url': 'h...Why Tesla Shares Jumped Initially, Then Retrea...newsfeedback@fool.com (Chris Neiger)https://www.fool.com/investing/2022/06/22/why-...[TSLA]No URL providedhttps://g.foolcdn.com/editorial/images/686400/...Tesla's Shanghai plant may temporarily suspend...[investing]0.00019:33:04
1970-01-01 00:00:00.0000000032022-06-22xTSACaU2z-Mzqa23BiUeHTEUuY_jbVTWOYFkN3uszxg{'name': 'MarketWatch', 'homepage_url': 'https...These are the 10 used-car models whose prices ...MarketWatchhttps://www.marketwatch.com/story/these-are-th...[LOTZ, TSLA]https://www.marketwatch.com/amp/story/these-ar...https://images.mktw.net/im-569120/socialUsed-car prices rose on average 17% in May, wi...No keywords0.22517:32:00
1970-01-01 00:00:00.0000000042022-06-22pksGB2t-5ukDnjkRw_VYr9sfcSOExNmQiHzUq9_rBa0{'name': 'Zacks Investment Research', 'homepag...Tesla (TSLA) to Cut 10% Salaried Jobs, Raise H...Zacks Equity Researchhttps://www.zacks.com/stock/news/1942395/tesla...[SMP, TSLA, WNC, FOXF]https://www.zacks.com/amp/stock/news/1942395/t...https://staticx-tuner.zacks.com/images/article...Tesla (TSLA) to lay off 10% of its salaried wo...No keywords0.00015:58:00
\n", "
" ], "text/plain": [ " date \\\n", "1970-01-01 00:00:00.000000000 2022-06-22 \n", "1970-01-01 00:00:00.000000001 2022-06-22 \n", "1970-01-01 00:00:00.000000002 2022-06-22 \n", "1970-01-01 00:00:00.000000003 2022-06-22 \n", "1970-01-01 00:00:00.000000004 2022-06-22 \n", "\n", " id \\\n", "1970-01-01 00:00:00.000000000 nVZWipNVQFFoSi87tF0APXW_uRHl13wat_uR1c0iR6A \n", "1970-01-01 00:00:00.000000001 x_9M1zYfZoHn_ptJM3USKxrodDtd4TcTsQSldXlViMw \n", "1970-01-01 00:00:00.000000002 SDepdS_qKyhE6vqbR6Fathnn81fYDapZCk3DKT1Xpv4 \n", "1970-01-01 00:00:00.000000003 xTSACaU2z-Mzqa23BiUeHTEUuY_jbVTWOYFkN3uszxg \n", "1970-01-01 00:00:00.000000004 pksGB2t-5ukDnjkRw_VYr9sfcSOExNmQiHzUq9_rBa0 \n", "\n", " publisher \\\n", "1970-01-01 00:00:00.000000000 {'name': 'Benzinga', 'homepage_url': 'https://... \n", "1970-01-01 00:00:00.000000001 {'name': 'MarketWatch', 'homepage_url': 'https... \n", "1970-01-01 00:00:00.000000002 {'name': 'The Motley Fool', 'homepage_url': 'h... \n", "1970-01-01 00:00:00.000000003 {'name': 'MarketWatch', 'homepage_url': 'https... \n", "1970-01-01 00:00:00.000000004 {'name': 'Zacks Investment Research', 'homepag... \n", "\n", " title \\\n", "1970-01-01 00:00:00.000000000 Elon Musk Gives New Update On Tesla Cybertruck... \n", "1970-01-01 00:00:00.000000001 Tesla's new factories are 'gigantic money furn... \n", "1970-01-01 00:00:00.000000002 Why Tesla Shares Jumped Initially, Then Retrea... \n", "1970-01-01 00:00:00.000000003 These are the 10 used-car models whose prices ... \n", "1970-01-01 00:00:00.000000004 Tesla (TSLA) to Cut 10% Salaried Jobs, Raise H... \n", "\n", " author \\\n", "1970-01-01 00:00:00.000000000 Chris Katje \n", "1970-01-01 00:00:00.000000001 MarketWatch \n", "1970-01-01 00:00:00.000000002 newsfeedback@fool.com (Chris Neiger) \n", "1970-01-01 00:00:00.000000003 MarketWatch \n", "1970-01-01 00:00:00.000000004 Zacks Equity Research \n", "\n", " article_url \\\n", "1970-01-01 00:00:00.000000000 https://www.benzinga.com/news/22/06/27820587/e... \n", "1970-01-01 00:00:00.000000001 https://www.marketwatch.com/story/teslas-new-f... \n", "1970-01-01 00:00:00.000000002 https://www.fool.com/investing/2022/06/22/why-... \n", "1970-01-01 00:00:00.000000003 https://www.marketwatch.com/story/these-are-th... \n", "1970-01-01 00:00:00.000000004 https://www.zacks.com/stock/news/1942395/tesla... \n", "\n", " tickers \\\n", "1970-01-01 00:00:00.000000000 [F, TSLA, RIVN] \n", "1970-01-01 00:00:00.000000001 [TSLA] \n", "1970-01-01 00:00:00.000000002 [TSLA] \n", "1970-01-01 00:00:00.000000003 [LOTZ, TSLA] \n", "1970-01-01 00:00:00.000000004 [SMP, TSLA, WNC, FOXF] \n", "\n", " amp_url \\\n", "1970-01-01 00:00:00.000000000 https://www.benzinga.com/amp/content/27820587 \n", "1970-01-01 00:00:00.000000001 No URL provided \n", "1970-01-01 00:00:00.000000002 No URL provided \n", "1970-01-01 00:00:00.000000003 https://www.marketwatch.com/amp/story/these-ar... \n", "1970-01-01 00:00:00.000000004 https://www.zacks.com/amp/stock/news/1942395/t... \n", "\n", " image_url \\\n", "1970-01-01 00:00:00.000000000 https://cdn.benzinga.com/files/images/story/20... \n", "1970-01-01 00:00:00.000000001 https://images.mktw.net/im-569600/social \n", "1970-01-01 00:00:00.000000002 https://g.foolcdn.com/editorial/images/686400/... \n", "1970-01-01 00:00:00.000000003 https://images.mktw.net/im-569120/social \n", "1970-01-01 00:00:00.000000004 https://staticx-tuner.zacks.com/images/article... \n", "\n", " description \\\n", "1970-01-01 00:00:00.000000000 A recent interview between Tesla Inc (NASDAQ: ... \n", "1970-01-01 00:00:00.000000001 Tesla Inc.’s two newest car factories have bee... \n", "1970-01-01 00:00:00.000000002 Tesla's Shanghai plant may temporarily suspend... \n", "1970-01-01 00:00:00.000000003 Used-car prices rose on average 17% in May, wi... \n", "1970-01-01 00:00:00.000000004 Tesla (TSLA) to lay off 10% of its salaried wo... \n", "\n", " keywords sentiment time \n", "1970-01-01 00:00:00.000000000 [News, Interview] 0.040 22:40:56 \n", "1970-01-01 00:00:00.000000001 No keywords 0.000 21:59:00 \n", "1970-01-01 00:00:00.000000002 [investing] 0.000 19:33:04 \n", "1970-01-01 00:00:00.000000003 No keywords 0.225 17:32:00 \n", "1970-01-01 00:00:00.000000004 No keywords 0.000 15:58:00 " ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "df= df.sort_index(ascending=False)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "df.to_csv('news_articles.csv', index=False)\n" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "df_processed = exponential_moving_average(df, window=7)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "df_processed.to_csv('news_articles_ema.csv', index=False)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateidpublishertitleauthorarticle_urltickersamp_urlimage_urldescriptionkeywordssentimenttimeexp_mean_7_days
1970-01-01 00:00:00.0000007192024-04-29MeWGIZiKn6J3JCwWAkHNqVv6Cc9HToUK-HmodQSesdM{'name': 'The Motley Fool', 'homepage_url': 'h...Why Baidu Stock Jumped Todaynewsfeedback@fool.com (James Brumley)https://www.fool.com/investing/2024/04/29/why-...[BIDU, GOOGL, TSLA, GOOG, IQ]No URL providedhttps://g.foolcdn.com/editorial/images/774939/...It's getting difficult not to notice how impre...[investing]0.25000021:26:090.250000
1970-01-01 00:00:00.0000007182024-04-29T9MgJwXEmlRjWpkmLvcwnBggkbeXWWoGzFISY65WwBc{'name': 'Zacks Investment Research', 'homepag...Markets Up on Tesla, Q1 Earnings; Q1 Beats Aft...Mark Vickeryhttps://www.zacks.com/stock/news/2264549/marke...[AMZN, AMD, KO, LLY, SBUX, MCD, NXPI, TSLA, YU...https://www.zacks.com/amp/stock/news/2264549/m...https://staticx-tuner.zacks.com/images/article...It's now the third straight day higher going b...No keywords0.11190522:10:000.215476
1970-01-01 00:00:00.0000007172024-04-30xaUyg2qUKK7h_EDkKruXR9KdY_drlcXLai14uHvZTsc{'name': 'Seeking Alpha', 'homepage_url': 'htt...Big Tech Earnings Beats Stymie Q2 2024 Sell-OffChristine Shorthttps://seekingalpha.com/article/4687390-big-t...[AAPL, AMZN, GOOG, GOOGL, HSY, KO, META, MMM, ...No URL providedhttps://static.seekingalpha.com/cdn/s3/uploads...Markets finally turned positive late last week...No keywords-0.03295504:30:000.153369
1970-01-01 00:00:00.0000007162024-04-30IPVxhBMbT73GJJHLQZYPFb8yQpAxzbEuXrJk0dMSt8U{'name': 'Zacks Investment Research', 'homepag...Stock Market News for Apr 30, 2024Zacks Equity Researchhttps://www.zacks.com/stock/news/2264591/stock...[AAPL, TSLA, PARA]https://www.zacks.com/amp/stock/news/2264591/s...https://staticx-tuner.zacks.com/images/article...Wall Street closed higher on Monday to start a...No keywords0.05000007:27:000.127526
1970-01-01 00:00:00.0000007152024-04-306pQAGkGEZvAd76QYnk6aAhhUCVLrUkdOjgnon-ALmsQ{'name': 'Benzinga', 'homepage_url': 'https://...'Tesla Has The Endorsement' Of Xi Jinping's Go...Benzinga Neurohttps://www.benzinga.com/analyst-ratings/analy...[TSLA]https://www.benzinga.com/amp/content/38511044https://cdn.benzinga.com/files/images/story/20...Tim Higgins, author of “Power Play: Tesla, Elo...[News, Analyst Color, Tech, General]0.00833307:42:580.097728
\n", "
" ], "text/plain": [ " date \\\n", "1970-01-01 00:00:00.000000719 2024-04-29 \n", "1970-01-01 00:00:00.000000718 2024-04-29 \n", "1970-01-01 00:00:00.000000717 2024-04-30 \n", "1970-01-01 00:00:00.000000716 2024-04-30 \n", "1970-01-01 00:00:00.000000715 2024-04-30 \n", "\n", " id \\\n", "1970-01-01 00:00:00.000000719 MeWGIZiKn6J3JCwWAkHNqVv6Cc9HToUK-HmodQSesdM \n", "1970-01-01 00:00:00.000000718 T9MgJwXEmlRjWpkmLvcwnBggkbeXWWoGzFISY65WwBc \n", "1970-01-01 00:00:00.000000717 xaUyg2qUKK7h_EDkKruXR9KdY_drlcXLai14uHvZTsc \n", "1970-01-01 00:00:00.000000716 IPVxhBMbT73GJJHLQZYPFb8yQpAxzbEuXrJk0dMSt8U \n", "1970-01-01 00:00:00.000000715 6pQAGkGEZvAd76QYnk6aAhhUCVLrUkdOjgnon-ALmsQ \n", "\n", " publisher \\\n", "1970-01-01 00:00:00.000000719 {'name': 'The Motley Fool', 'homepage_url': 'h... \n", "1970-01-01 00:00:00.000000718 {'name': 'Zacks Investment Research', 'homepag... \n", "1970-01-01 00:00:00.000000717 {'name': 'Seeking Alpha', 'homepage_url': 'htt... \n", "1970-01-01 00:00:00.000000716 {'name': 'Zacks Investment Research', 'homepag... \n", "1970-01-01 00:00:00.000000715 {'name': 'Benzinga', 'homepage_url': 'https://... \n", "\n", " title \\\n", "1970-01-01 00:00:00.000000719 Why Baidu Stock Jumped Today \n", "1970-01-01 00:00:00.000000718 Markets Up on Tesla, Q1 Earnings; Q1 Beats Aft... \n", "1970-01-01 00:00:00.000000717 Big Tech Earnings Beats Stymie Q2 2024 Sell-Off \n", "1970-01-01 00:00:00.000000716 Stock Market News for Apr 30, 2024 \n", "1970-01-01 00:00:00.000000715 'Tesla Has The Endorsement' Of Xi Jinping's Go... \n", "\n", " author \\\n", "1970-01-01 00:00:00.000000719 newsfeedback@fool.com (James Brumley) \n", "1970-01-01 00:00:00.000000718 Mark Vickery \n", "1970-01-01 00:00:00.000000717 Christine Short \n", "1970-01-01 00:00:00.000000716 Zacks Equity Research \n", "1970-01-01 00:00:00.000000715 Benzinga Neuro \n", "\n", " article_url \\\n", "1970-01-01 00:00:00.000000719 https://www.fool.com/investing/2024/04/29/why-... \n", "1970-01-01 00:00:00.000000718 https://www.zacks.com/stock/news/2264549/marke... \n", "1970-01-01 00:00:00.000000717 https://seekingalpha.com/article/4687390-big-t... \n", "1970-01-01 00:00:00.000000716 https://www.zacks.com/stock/news/2264591/stock... \n", "1970-01-01 00:00:00.000000715 https://www.benzinga.com/analyst-ratings/analy... \n", "\n", " tickers \\\n", "1970-01-01 00:00:00.000000719 [BIDU, GOOGL, TSLA, GOOG, IQ] \n", "1970-01-01 00:00:00.000000718 [AMZN, AMD, KO, LLY, SBUX, MCD, NXPI, TSLA, YU... \n", "1970-01-01 00:00:00.000000717 [AAPL, AMZN, GOOG, GOOGL, HSY, KO, META, MMM, ... \n", "1970-01-01 00:00:00.000000716 [AAPL, TSLA, PARA] \n", "1970-01-01 00:00:00.000000715 [TSLA] \n", "\n", " amp_url \\\n", "1970-01-01 00:00:00.000000719 No URL provided \n", "1970-01-01 00:00:00.000000718 https://www.zacks.com/amp/stock/news/2264549/m... \n", "1970-01-01 00:00:00.000000717 No URL provided \n", "1970-01-01 00:00:00.000000716 https://www.zacks.com/amp/stock/news/2264591/s... \n", "1970-01-01 00:00:00.000000715 https://www.benzinga.com/amp/content/38511044 \n", "\n", " image_url \\\n", "1970-01-01 00:00:00.000000719 https://g.foolcdn.com/editorial/images/774939/... \n", "1970-01-01 00:00:00.000000718 https://staticx-tuner.zacks.com/images/article... \n", "1970-01-01 00:00:00.000000717 https://static.seekingalpha.com/cdn/s3/uploads... \n", "1970-01-01 00:00:00.000000716 https://staticx-tuner.zacks.com/images/article... \n", "1970-01-01 00:00:00.000000715 https://cdn.benzinga.com/files/images/story/20... \n", "\n", " description \\\n", "1970-01-01 00:00:00.000000719 It's getting difficult not to notice how impre... \n", "1970-01-01 00:00:00.000000718 It's now the third straight day higher going b... \n", "1970-01-01 00:00:00.000000717 Markets finally turned positive late last week... \n", "1970-01-01 00:00:00.000000716 Wall Street closed higher on Monday to start a... \n", "1970-01-01 00:00:00.000000715 Tim Higgins, author of “Power Play: Tesla, Elo... \n", "\n", " keywords \\\n", "1970-01-01 00:00:00.000000719 [investing] \n", "1970-01-01 00:00:00.000000718 No keywords \n", "1970-01-01 00:00:00.000000717 No keywords \n", "1970-01-01 00:00:00.000000716 No keywords \n", "1970-01-01 00:00:00.000000715 [News, Analyst Color, Tech, General] \n", "\n", " sentiment time exp_mean_7_days \n", "1970-01-01 00:00:00.000000719 0.250000 21:26:09 0.250000 \n", "1970-01-01 00:00:00.000000718 0.111905 22:10:00 0.215476 \n", "1970-01-01 00:00:00.000000717 -0.032955 04:30:00 0.153369 \n", "1970-01-01 00:00:00.000000716 0.050000 07:27:00 0.127526 \n", "1970-01-01 00:00:00.000000715 0.008333 07:42:58 0.097728 " ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_processed.head()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateidpublishertitleauthorarticle_urltickersamp_urlimage_urldescriptionkeywordssentimenttimeexp_mean_7_days
1970-01-01 00:00:00.0000000042022-06-22pksGB2t-5ukDnjkRw_VYr9sfcSOExNmQiHzUq9_rBa0{'name': 'Zacks Investment Research', 'homepag...Tesla (TSLA) to Cut 10% Salaried Jobs, Raise H...Zacks Equity Researchhttps://www.zacks.com/stock/news/1942395/tesla...[SMP, TSLA, WNC, FOXF]https://www.zacks.com/amp/stock/news/1942395/t...https://staticx-tuner.zacks.com/images/article...Tesla (TSLA) to lay off 10% of its salaried wo...No keywords0.00015:58:000.195010
1970-01-01 00:00:00.0000000032022-06-22xTSACaU2z-Mzqa23BiUeHTEUuY_jbVTWOYFkN3uszxg{'name': 'MarketWatch', 'homepage_url': 'https...These are the 10 used-car models whose prices ...MarketWatchhttps://www.marketwatch.com/story/these-are-th...[LOTZ, TSLA]https://www.marketwatch.com/amp/story/these-ar...https://images.mktw.net/im-569120/socialUsed-car prices rose on average 17% in May, wi...No keywords0.22517:32:000.202508
1970-01-01 00:00:00.0000000022022-06-22SDepdS_qKyhE6vqbR6Fathnn81fYDapZCk3DKT1Xpv4{'name': 'The Motley Fool', 'homepage_url': 'h...Why Tesla Shares Jumped Initially, Then Retrea...newsfeedback@fool.com (Chris Neiger)https://www.fool.com/investing/2022/06/22/why-...[TSLA]No URL providedhttps://g.foolcdn.com/editorial/images/686400/...Tesla's Shanghai plant may temporarily suspend...[investing]0.00019:33:040.151881
1970-01-01 00:00:00.0000000012022-06-22x_9M1zYfZoHn_ptJM3USKxrodDtd4TcTsQSldXlViMw{'name': 'MarketWatch', 'homepage_url': 'https...Tesla's new factories are 'gigantic money furn...MarketWatchhttps://www.marketwatch.com/story/teslas-new-f...[TSLA]No URL providedhttps://images.mktw.net/im-569600/socialTesla Inc.’s two newest car factories have bee...No keywords0.00021:59:000.113910
1970-01-01 00:00:00.0000000002022-06-22nVZWipNVQFFoSi87tF0APXW_uRHl13wat_uR1c0iR6A{'name': 'Benzinga', 'homepage_url': 'https://...Elon Musk Gives New Update On Tesla Cybertruck...Chris Katjehttps://www.benzinga.com/news/22/06/27820587/e...[F, TSLA, RIVN]https://www.benzinga.com/amp/content/27820587https://cdn.benzinga.com/files/images/story/20...A recent interview between Tesla Inc (NASDAQ: ...[News, Interview]0.04022:40:560.095433
\n", "
" ], "text/plain": [ " date \\\n", "1970-01-01 00:00:00.000000004 2022-06-22 \n", "1970-01-01 00:00:00.000000003 2022-06-22 \n", "1970-01-01 00:00:00.000000002 2022-06-22 \n", "1970-01-01 00:00:00.000000001 2022-06-22 \n", "1970-01-01 00:00:00.000000000 2022-06-22 \n", "\n", " id \\\n", "1970-01-01 00:00:00.000000004 pksGB2t-5ukDnjkRw_VYr9sfcSOExNmQiHzUq9_rBa0 \n", "1970-01-01 00:00:00.000000003 xTSACaU2z-Mzqa23BiUeHTEUuY_jbVTWOYFkN3uszxg \n", "1970-01-01 00:00:00.000000002 SDepdS_qKyhE6vqbR6Fathnn81fYDapZCk3DKT1Xpv4 \n", "1970-01-01 00:00:00.000000001 x_9M1zYfZoHn_ptJM3USKxrodDtd4TcTsQSldXlViMw \n", "1970-01-01 00:00:00.000000000 nVZWipNVQFFoSi87tF0APXW_uRHl13wat_uR1c0iR6A \n", "\n", " publisher \\\n", "1970-01-01 00:00:00.000000004 {'name': 'Zacks Investment Research', 'homepag... \n", "1970-01-01 00:00:00.000000003 {'name': 'MarketWatch', 'homepage_url': 'https... \n", "1970-01-01 00:00:00.000000002 {'name': 'The Motley Fool', 'homepage_url': 'h... \n", "1970-01-01 00:00:00.000000001 {'name': 'MarketWatch', 'homepage_url': 'https... \n", "1970-01-01 00:00:00.000000000 {'name': 'Benzinga', 'homepage_url': 'https://... \n", "\n", " title \\\n", "1970-01-01 00:00:00.000000004 Tesla (TSLA) to Cut 10% Salaried Jobs, Raise H... \n", "1970-01-01 00:00:00.000000003 These are the 10 used-car models whose prices ... \n", "1970-01-01 00:00:00.000000002 Why Tesla Shares Jumped Initially, Then Retrea... \n", "1970-01-01 00:00:00.000000001 Tesla's new factories are 'gigantic money furn... \n", "1970-01-01 00:00:00.000000000 Elon Musk Gives New Update On Tesla Cybertruck... \n", "\n", " author \\\n", "1970-01-01 00:00:00.000000004 Zacks Equity Research \n", "1970-01-01 00:00:00.000000003 MarketWatch \n", "1970-01-01 00:00:00.000000002 newsfeedback@fool.com (Chris Neiger) \n", "1970-01-01 00:00:00.000000001 MarketWatch \n", "1970-01-01 00:00:00.000000000 Chris Katje \n", "\n", " article_url \\\n", "1970-01-01 00:00:00.000000004 https://www.zacks.com/stock/news/1942395/tesla... \n", "1970-01-01 00:00:00.000000003 https://www.marketwatch.com/story/these-are-th... \n", "1970-01-01 00:00:00.000000002 https://www.fool.com/investing/2022/06/22/why-... \n", "1970-01-01 00:00:00.000000001 https://www.marketwatch.com/story/teslas-new-f... \n", "1970-01-01 00:00:00.000000000 https://www.benzinga.com/news/22/06/27820587/e... \n", "\n", " tickers \\\n", "1970-01-01 00:00:00.000000004 [SMP, TSLA, WNC, FOXF] \n", "1970-01-01 00:00:00.000000003 [LOTZ, TSLA] \n", "1970-01-01 00:00:00.000000002 [TSLA] \n", "1970-01-01 00:00:00.000000001 [TSLA] \n", "1970-01-01 00:00:00.000000000 [F, TSLA, RIVN] \n", "\n", " amp_url \\\n", "1970-01-01 00:00:00.000000004 https://www.zacks.com/amp/stock/news/1942395/t... \n", "1970-01-01 00:00:00.000000003 https://www.marketwatch.com/amp/story/these-ar... \n", "1970-01-01 00:00:00.000000002 No URL provided \n", "1970-01-01 00:00:00.000000001 No URL provided \n", "1970-01-01 00:00:00.000000000 https://www.benzinga.com/amp/content/27820587 \n", "\n", " image_url \\\n", "1970-01-01 00:00:00.000000004 https://staticx-tuner.zacks.com/images/article... \n", "1970-01-01 00:00:00.000000003 https://images.mktw.net/im-569120/social \n", "1970-01-01 00:00:00.000000002 https://g.foolcdn.com/editorial/images/686400/... \n", "1970-01-01 00:00:00.000000001 https://images.mktw.net/im-569600/social \n", "1970-01-01 00:00:00.000000000 https://cdn.benzinga.com/files/images/story/20... \n", "\n", " description \\\n", "1970-01-01 00:00:00.000000004 Tesla (TSLA) to lay off 10% of its salaried wo... \n", "1970-01-01 00:00:00.000000003 Used-car prices rose on average 17% in May, wi... \n", "1970-01-01 00:00:00.000000002 Tesla's Shanghai plant may temporarily suspend... \n", "1970-01-01 00:00:00.000000001 Tesla Inc.’s two newest car factories have bee... \n", "1970-01-01 00:00:00.000000000 A recent interview between Tesla Inc (NASDAQ: ... \n", "\n", " keywords sentiment time \\\n", "1970-01-01 00:00:00.000000004 No keywords 0.000 15:58:00 \n", "1970-01-01 00:00:00.000000003 No keywords 0.225 17:32:00 \n", "1970-01-01 00:00:00.000000002 [investing] 0.000 19:33:04 \n", "1970-01-01 00:00:00.000000001 No keywords 0.000 21:59:00 \n", "1970-01-01 00:00:00.000000000 [News, Interview] 0.040 22:40:56 \n", "\n", " exp_mean_7_days \n", "1970-01-01 00:00:00.000000004 0.195010 \n", "1970-01-01 00:00:00.000000003 0.202508 \n", "1970-01-01 00:00:00.000000002 0.151881 \n", "1970-01-01 00:00:00.000000001 0.113910 \n", "1970-01-01 00:00:00.000000000 0.095433 " ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_processed.tail()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2022-06-20\n", "2024-05-02\n" ] } ], "source": [ "print(df_processed['date'].min())\n", "print(df_processed['date'].max())" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "682 days, 0:00:00\n" ] } ], "source": [ "print(df_processed['date'].max() - df_processed['date'].min()) " ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(720, 14)" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_processed.shape" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "duplicates = df_processed[df_processed.duplicated('date')]" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(657, 14)" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "duplicates.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }