In [14]:
from dotenv import load_dotenv
from datetime import datetime, timedelta
import requests
import os
import time
import pandas as pd 
from news_preprocessing import *

In [15]:
load_dotenv()

True

In [16]:
def fetch_news(api_key, ticker, start_date, end_date):
    base_url = os.environ.get("endpointnewsp")
    headers = {"Authorization": f"Bearer {api_key}"}
    all_news = []
    
    end_date = datetime.now() - timedelta(days=1)  # Yesterday's date
    start_date = end_date - timedelta(days=365 * 2)  # Two years back
    current_date = start_date

    while current_date <= end_date:
        batch_end_date = current_date + timedelta(days=50)
        if batch_end_date > end_date:
            batch_end_date = end_date

        params = {
            "ticker": ticker,
            "published_utc.gte": current_date.strftime('%Y-%m-%d'),
            "published_utc.lte": batch_end_date.strftime('%Y-%m-%d'),
            "limit": 50,
            "sort": "published_utc"
        }

        try:
            response = requests.get(base_url, headers=headers, params=params)
            if response.status_code == 200:
                data = response.json()
                articles = data.get('results', [])
                all_news.extend(articles)
                print(f"Fetched {len(articles)} articles from {current_date.strftime('%Y-%m-%d')} to {batch_end_date.strftime('%Y-%m-%d')}")
                current_date = batch_end_date + timedelta(days=1)
            elif response.status_code == 429:
                print("Rate limit reached. Waiting to retry...")
                time.sleep(60)  # Wait for 60 seconds or as recommended by the API
                continue  # Retry the current request
            else:
                print(f"Failed to fetch data: {response.status_code}, {response.text}")
                break
        except Exception as e:
            print(f"An error occurred: {e}")
            break

    return all_news

# Example usage
api_key = os.environ.get('newsp_api')
ticker = 'TSLA'
#start_date = datetime(2022, 4, 1)  # start date
#end_date = datetime(2024, 4, 1)
news_articles = fetch_news(api_key, ticker, start_date, end_date)
print(f"Total articles fetched: {len(news_articles)}")


Fetched 50 articles from 2022-05-03 to 2022-06-22
Fetched 50 articles from 2022-06-23 to 2022-08-12
Fetched 50 articles from 2022-08-13 to 2022-10-02
Fetched 50 articles from 2022-10-03 to 2022-11-22
Fetched 50 articles from 2022-11-23 to 2023-01-12
Rate limit reached. Waiting to retry...
Fetched 50 articles from 2023-01-13 to 2023-03-04
Fetched 50 articles from 2023-03-05 to 2023-04-24
Fetched 50 articles from 2023-04-25 to 2023-06-14
Fetched 50 articles from 2023-06-15 to 2023-08-04
Fetched 50 articles from 2023-08-05 to 2023-09-24
Rate limit reached. Waiting to retry...
Fetched 50 articles from 2023-09-25 to 2023-11-14
Fetched 50 articles from 2023-11-15 to 2024-01-04
Fetched 50 articles from 2024-01-05 to 2024-02-24
Fetched 50 articles from 2024-02-25 to 2024-04-15
Fetched 50 articles from 2024-04-16 to 2024-05-02
Total articles fetched: 750


In [17]:
# Process the news articles
df = process_news_articles(news_articles)

In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 720 entries, 1970-01-01 00:00:00 to 1970-01-01 00:00:00.000000719
Data columns (total 13 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   date         720 non-null    object 
 1   id           720 non-null    object 
 2   publisher    720 non-null    object 
 3   title        720 non-null    object 
 4   author       720 non-null    object 
 5   article_url  720 non-null    object 
 6   tickers      720 non-null    object 
 7   amp_url      720 non-null    object 
 8   image_url    720 non-null    object 
 9   description  720 non-null    object 
 10  keywords     720 non-null    object 
 11  sentiment    720 non-null    float64
 12  time         720 non-null    object 
dtypes: float64(1), object(12)
memory usage: 78.8+ KB


In [19]:
df.head()

Unnamed: 0,date,id,publisher,title,author,article_url,tickers,amp_url,image_url,description,keywords,sentiment,time
1970-01-01 00:00:00.000000000,2022-06-22,nVZWipNVQFFoSi87tF0APXW_uRHl13wat_uR1c0iR6A,"{'name': 'Benzinga', 'homepage_url': 'https://...",Elon Musk Gives New Update On Tesla Cybertruck...,Chris Katje,https://www.benzinga.com/news/22/06/27820587/e...,"[F, TSLA, RIVN]",https://www.benzinga.com/amp/content/27820587,https://cdn.benzinga.com/files/images/story/20...,A recent interview between Tesla Inc (NASDAQ: ...,"[News, Interview]",0.04,22:40:56
1970-01-01 00:00:00.000000001,2022-06-22,x_9M1zYfZoHn_ptJM3USKxrodDtd4TcTsQSldXlViMw,"{'name': 'MarketWatch', 'homepage_url': 'https...",Tesla's new factories are 'gigantic money furn...,MarketWatch,https://www.marketwatch.com/story/teslas-new-f...,[TSLA],No URL provided,https://images.mktw.net/im-569600/social,Tesla Inc.’s two newest car factories have bee...,No keywords,0.0,21:59:00
1970-01-01 00:00:00.000000002,2022-06-22,SDepdS_qKyhE6vqbR6Fathnn81fYDapZCk3DKT1Xpv4,"{'name': 'The Motley Fool', 'homepage_url': 'h...","Why Tesla Shares Jumped Initially, Then Retrea...",newsfeedback@fool.com (Chris Neiger),https://www.fool.com/investing/2022/06/22/why-...,[TSLA],No URL provided,https://g.foolcdn.com/editorial/images/686400/...,Tesla's Shanghai plant may temporarily suspend...,[investing],0.0,19:33:04
1970-01-01 00:00:00.000000003,2022-06-22,xTSACaU2z-Mzqa23BiUeHTEUuY_jbVTWOYFkN3uszxg,"{'name': 'MarketWatch', 'homepage_url': 'https...",These are the 10 used-car models whose prices ...,MarketWatch,https://www.marketwatch.com/story/these-are-th...,"[LOTZ, TSLA]",https://www.marketwatch.com/amp/story/these-ar...,https://images.mktw.net/im-569120/social,"Used-car prices rose on average 17% in May, wi...",No keywords,0.225,17:32:00
1970-01-01 00:00:00.000000004,2022-06-22,pksGB2t-5ukDnjkRw_VYr9sfcSOExNmQiHzUq9_rBa0,"{'name': 'Zacks Investment Research', 'homepag...","Tesla (TSLA) to Cut 10% Salaried Jobs, Raise H...",Zacks Equity Research,https://www.zacks.com/stock/news/1942395/tesla...,"[SMP, TSLA, WNC, FOXF]",https://www.zacks.com/amp/stock/news/1942395/t...,https://staticx-tuner.zacks.com/images/article...,Tesla (TSLA) to lay off 10% of its salaried wo...,No keywords,0.0,15:58:00


In [20]:
df= df.sort_index(ascending=False)

In [21]:
df.to_csv('news_articles.csv', index=False)


In [22]:
df_processed = exponential_moving_average(df, window=7)

In [23]:
df_processed.to_csv('news_articles_ema.csv', index=False)

In [24]:
df_processed.head()

Unnamed: 0,date,id,publisher,title,author,article_url,tickers,amp_url,image_url,description,keywords,sentiment,time,exp_mean_7_days
1970-01-01 00:00:00.000000719,2024-04-29,MeWGIZiKn6J3JCwWAkHNqVv6Cc9HToUK-HmodQSesdM,"{'name': 'The Motley Fool', 'homepage_url': 'h...",Why Baidu Stock Jumped Today,newsfeedback@fool.com (James Brumley),https://www.fool.com/investing/2024/04/29/why-...,"[BIDU, GOOGL, TSLA, GOOG, IQ]",No URL provided,https://g.foolcdn.com/editorial/images/774939/...,It's getting difficult not to notice how impre...,[investing],0.25,21:26:09,0.25
1970-01-01 00:00:00.000000718,2024-04-29,T9MgJwXEmlRjWpkmLvcwnBggkbeXWWoGzFISY65WwBc,"{'name': 'Zacks Investment Research', 'homepag...","Markets Up on Tesla, Q1 Earnings; Q1 Beats Aft...",Mark Vickery,https://www.zacks.com/stock/news/2264549/marke...,"[AMZN, AMD, KO, LLY, SBUX, MCD, NXPI, TSLA, YU...",https://www.zacks.com/amp/stock/news/2264549/m...,https://staticx-tuner.zacks.com/images/article...,It's now the third straight day higher going b...,No keywords,0.111905,22:10:00,0.215476
1970-01-01 00:00:00.000000717,2024-04-30,xaUyg2qUKK7h_EDkKruXR9KdY_drlcXLai14uHvZTsc,"{'name': 'Seeking Alpha', 'homepage_url': 'htt...",Big Tech Earnings Beats Stymie Q2 2024 Sell-Off,Christine Short,https://seekingalpha.com/article/4687390-big-t...,"[AAPL, AMZN, GOOG, GOOGL, HSY, KO, META, MMM, ...",No URL provided,https://static.seekingalpha.com/cdn/s3/uploads...,Markets finally turned positive late last week...,No keywords,-0.032955,04:30:00,0.153369
1970-01-01 00:00:00.000000716,2024-04-30,IPVxhBMbT73GJJHLQZYPFb8yQpAxzbEuXrJk0dMSt8U,"{'name': 'Zacks Investment Research', 'homepag...","Stock Market News for Apr 30, 2024",Zacks Equity Research,https://www.zacks.com/stock/news/2264591/stock...,"[AAPL, TSLA, PARA]",https://www.zacks.com/amp/stock/news/2264591/s...,https://staticx-tuner.zacks.com/images/article...,Wall Street closed higher on Monday to start a...,No keywords,0.05,07:27:00,0.127526
1970-01-01 00:00:00.000000715,2024-04-30,6pQAGkGEZvAd76QYnk6aAhhUCVLrUkdOjgnon-ALmsQ,"{'name': 'Benzinga', 'homepage_url': 'https://...",'Tesla Has The Endorsement' Of Xi Jinping's Go...,Benzinga Neuro,https://www.benzinga.com/analyst-ratings/analy...,[TSLA],https://www.benzinga.com/amp/content/38511044,https://cdn.benzinga.com/files/images/story/20...,"Tim Higgins, author of “Power Play: Tesla, Elo...","[News, Analyst Color, Tech, General]",0.008333,07:42:58,0.097728


In [25]:
df_processed.tail()

Unnamed: 0,date,id,publisher,title,author,article_url,tickers,amp_url,image_url,description,keywords,sentiment,time,exp_mean_7_days
1970-01-01 00:00:00.000000004,2022-06-22,pksGB2t-5ukDnjkRw_VYr9sfcSOExNmQiHzUq9_rBa0,"{'name': 'Zacks Investment Research', 'homepag...","Tesla (TSLA) to Cut 10% Salaried Jobs, Raise H...",Zacks Equity Research,https://www.zacks.com/stock/news/1942395/tesla...,"[SMP, TSLA, WNC, FOXF]",https://www.zacks.com/amp/stock/news/1942395/t...,https://staticx-tuner.zacks.com/images/article...,Tesla (TSLA) to lay off 10% of its salaried wo...,No keywords,0.0,15:58:00,0.19501
1970-01-01 00:00:00.000000003,2022-06-22,xTSACaU2z-Mzqa23BiUeHTEUuY_jbVTWOYFkN3uszxg,"{'name': 'MarketWatch', 'homepage_url': 'https...",These are the 10 used-car models whose prices ...,MarketWatch,https://www.marketwatch.com/story/these-are-th...,"[LOTZ, TSLA]",https://www.marketwatch.com/amp/story/these-ar...,https://images.mktw.net/im-569120/social,"Used-car prices rose on average 17% in May, wi...",No keywords,0.225,17:32:00,0.202508
1970-01-01 00:00:00.000000002,2022-06-22,SDepdS_qKyhE6vqbR6Fathnn81fYDapZCk3DKT1Xpv4,"{'name': 'The Motley Fool', 'homepage_url': 'h...","Why Tesla Shares Jumped Initially, Then Retrea...",newsfeedback@fool.com (Chris Neiger),https://www.fool.com/investing/2022/06/22/why-...,[TSLA],No URL provided,https://g.foolcdn.com/editorial/images/686400/...,Tesla's Shanghai plant may temporarily suspend...,[investing],0.0,19:33:04,0.151881
1970-01-01 00:00:00.000000001,2022-06-22,x_9M1zYfZoHn_ptJM3USKxrodDtd4TcTsQSldXlViMw,"{'name': 'MarketWatch', 'homepage_url': 'https...",Tesla's new factories are 'gigantic money furn...,MarketWatch,https://www.marketwatch.com/story/teslas-new-f...,[TSLA],No URL provided,https://images.mktw.net/im-569600/social,Tesla Inc.’s two newest car factories have bee...,No keywords,0.0,21:59:00,0.11391
1970-01-01 00:00:00.000000000,2022-06-22,nVZWipNVQFFoSi87tF0APXW_uRHl13wat_uR1c0iR6A,"{'name': 'Benzinga', 'homepage_url': 'https://...",Elon Musk Gives New Update On Tesla Cybertruck...,Chris Katje,https://www.benzinga.com/news/22/06/27820587/e...,"[F, TSLA, RIVN]",https://www.benzinga.com/amp/content/27820587,https://cdn.benzinga.com/files/images/story/20...,A recent interview between Tesla Inc (NASDAQ: ...,"[News, Interview]",0.04,22:40:56,0.095433


In [27]:
print(df_processed['date'].min())
print(df_processed['date'].max())

2022-06-20
2024-05-02


In [28]:
print(df_processed['date'].max() - df_processed['date'].min()) 

682 days, 0:00:00


In [29]:
df_processed.shape

(720, 14)

In [30]:
duplicates = df_processed[df_processed.duplicated('date')]

In [31]:
duplicates.shape

(657, 14)