In [23]:
import requests
from datetime import datetime, timedelta
from dotenv import load_dotenv
import os

load_dotenv()

def fetch_articles(api_key, start_date, end_date):
    url = "https://api.marketaux.com/v1/news/all?symbols=TSLA&filter_entities=true&language=en&api_token=iy6rRX4oxFrouZocXr8JNpOzaxZLk3UvMfoMGxYs"
    headers = {"x-api-key": api_key}
    params = {
        "tickers": "TSLA",
        "filter_entities": "true",
        "language": "en",
        "from": start_date.strftime('%Y-%m-%d'),
        "to": end_date.strftime('%Y-%m-%d'),
        "page": 1
    }
    articles = []

    while True:
        response = requests.get(url, headers=headers, params=params)
        if response.status_code == 200:
            data = response.json()['data']
            articles.extend(data)
            if len(data) < 3:  # Assuming 3 articles per page is the limit
                break
            params['page'] += 1  # Increment to fetch the next page
        else:
            print(f"Failed to fetch data: {response.text}")
            break
        if params['page'] > 100:  # Prevent exceeding the daily request limit
            break

    return articles


In [2]:
def fetch_data_over_time(api_key, start_year):
    current_date = datetime.now()
    start_date = datetime(start_year, 1, 1)

    all_articles = []

    while start_date < current_date:
        end_date = start_date + timedelta(days=30)  # Fetch month by month
        if end_date > current_date:
            end_date = current_date

        print(f"Fetching from {start_date.date()} to {end_date.date()}")
        articles = fetch_articles(api_key, start_date, end_date)
        all_articles.extend(articles)

        start_date += timedelta(days=31)  # Move to the next month

    return all_articles


In [24]:
api_key = os.environ.get('news_api')
all_articles = fetch_data_over_time(api_key, 2017)
print(f"Total articles fetched: {len(all_articles)}")


Fetching from 2017-01-01 to 2017-01-31


KeyboardInterrupt: 

In [4]:
all_articles

[{'uuid': 'f21ee777-8ceb-4352-8ead-088f094078a6',
  'title': "S&P's Risk-On Momentum Gets Added Boost From Rebounding Profits",
  'description': 'With the quarterly reporting season roughly half over, US corporate earnings have largely lived up to Wall Street’s optimistic expectations even as…',
  'keywords': '',
  'snippet': "This advertisement has not loaded yet, but your article continues below.\n\nS&P's Risk-On Momentum Gets Added Boost From Rebounding Profits With the quarterly rep...",
  'url': 'https://financialpost.com/pmn/business-pmn/sps-risk-on-momentum-gets-added-boost-from-rebounding-profits',
  'image_url': 'https://smartcdn.gprod.postmedia.digital/financialpost/wp-content/uploads/2024/04/improving-cpi-ppi-spread-supports-sps-margin-outlook-though.jpg',
  'language': 'en',
  'published_at': '2024-04-27T12:50:04.000000Z',
  'source': 'financialpost.com',
  'relevance_score': None,
  'entities': [{'symbol': 'TSLA',
    'name': 'Tesla, Inc.',
    'exchange': None,
    'excha

In [5]:
import json

# Save to JSON
with open('tesla_articles.json', 'w') as f:
    json.dump(all_articles, f)


In [7]:
import pandas as pd 
# Load JSON data
with open('tesla_articles.json', 'r') as file:
    articles = json.load(file)

# Convert to DataFrame for easier manipulation and analysis
articles_df = pd.DataFrame(articles)
print(articles_df.head())
print(articles_df.describe())


                                   uuid  \
0  f21ee777-8ceb-4352-8ead-088f094078a6   
1  91bb4b80-a1db-465e-ab05-10978878a910   
2  88ceedc6-4582-4c58-9ee6-7aea4b6e1953   
3  06141a58-09c6-45a9-831b-0e41c471de49   
4  bf52e329-bc56-4c94-a04b-e17632a6cf8a   

                                               title  \
0  S&P's Risk-On Momentum Gets Added Boost From R...   
1  The "Magnificent Seven" Day of Reckoning Has A...   
2          Wall Street Breakfast: What Moved Markets   
3  1 "Magnificent Seven" Stock With 1,234% Upside...   
4  Market Today: Tech Giants Reignite AI Craze, A...   

                                         description  \
0  With the quarterly reporting season roughly ha...   
1  The Nasdaq Composite just posted its worst wee...   
2  The U.S. stocks rebounded this week amid posit...   
3  Tesla is the worst-performing stock in the "Ma...   
4  Weekly Stock Market OverviewThe stock market s...   

                                            keywords  \
0          

In [9]:
# Example of removing duplicates
articles_df.drop_duplicates(subset=['title'], inplace=True)

# Check for missing values
print(articles_df.isnull().sum())


uuid                 0
title                0
description          0
keywords             0
snippet              0
url                  0
image_url            0
language             0
published_at         0
source               0
relevance_score    176
entities             0
similar              0
dtype: int64


In [11]:
# Convert the date column to datetime
articles_df['published_at'] = pd.to_datetime(articles_df['published_at'])

# Find the last fetched date
last_fetched_date = articles_df['published_at'].max()
print("Last fetched date:", last_fetched_date)

Last fetched date: 2024-04-27 12:50:04+00:00


In [12]:
articles_df

Unnamed: 0,uuid,title,description,keywords,snippet,url,image_url,language,published_at,source,relevance_score,entities,similar
0,f21ee777-8ceb-4352-8ead-088f094078a6,S&P's Risk-On Momentum Gets Added Boost From R...,With the quarterly reporting season roughly ha...,,"This advertisement has not loaded yet, but you...",https://financialpost.com/pmn/business-pmn/sps...,https://smartcdn.gprod.postmedia.digital/finan...,en,2024-04-27 12:50:04+00:00,financialpost.com,,"[{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex...",[]
1,91bb4b80-a1db-465e-ab05-10978878a910,"The ""Magnificent Seven"" Day of Reckoning Has A...",The Nasdaq Composite just posted its worst wee...,,The Nasdaq Composite just posted its worst wee...,https://www.fool.com/investing/2024/04/27/the-...,https://g.foolcdn.com/editorial/images/773665/...,en,2024-04-27 12:10:00+00:00,finance.yahoo.com,,"[{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex...",[]
2,88ceedc6-4582-4c58-9ee6-7aea4b6e1953,Wall Street Breakfast: What Moved Markets,The U.S. stocks rebounded this week amid posit...,,What a difference a week can make! After posti...,https://seekingalpha.com/article/4686982-wall-...,https://static.seekingalpha.com/cdn/s3/uploads...,en,2024-04-27 10:39:22+00:00,seekingalpha.com,,"[{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex...",[]
3,06141a58-09c6-45a9-831b-0e41c471de49,"1 ""Magnificent Seven"" Stock With 1,234% Upside...","Tesla is the worst-performing stock in the ""Ma...",,"Tesla is the worst-performing stock in the ""Ma...",https://www.fool.com/investing/2024/04/27/1-ma...,https://g.foolcdn.com/editorial/images/774335/...,en,2024-04-27 10:16:00+00:00,finance.yahoo.com,,"[{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex...",[]
4,bf52e329-bc56-4c94-a04b-e17632a6cf8a,"Market Today: Tech Giants Reignite AI Craze, A...",Weekly Stock Market OverviewThe stock market s...,"GuruFocus, Article, News, GuruFocus Research, ...",Weekly Stock Market Overview\n\nThe stock mark...,https://www.gurufocus.com/news/2419941/market-...,https://static.gurufocus.com/17328035738566615...,en,2024-04-26 22:40:33+00:00,gurufocus.com,,"[{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex...",[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...
174,dfa721c1-bdd6-4c9e-b79d-975d264c1ac2,Everyone said 'sell Tesla' ahead of earnings; ...,Everyone said 'sell Tesla' ahead of earnings; ...,,Investing.com — After a consolidation period a...,https://www.investing.com/news/stock-market-ne...,https://i-invdn-com.investing.com/news/LYNXNPE...,en,2024-04-24 11:53:17+00:00,investing.com,,"[{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex...",[]
175,9eed0a00-5837-44ca-a83d-274aebf771c9,Tesla stock targets slashed across Wall Street...,Tesla stock targets slashed across Wall Street...,,In the wake of Tesla (NASDAQ: )'s earnings rep...,https://www.investing.com/news/stock-market-ne...,https://i-invdn-com.investing.com/news/LYNXMPE...,en,2024-04-24 11:44:18+00:00,investing.com,,"[{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex...",[]
176,647dd010-db33-4fb3-ad7e-92ffbaa834a2,Meta’s $350 Billion Rally to Collide With Loft...,(Bloomberg) -- When it comes to social-media s...,,(Bloomberg) -- When it comes to social-media s...,https://finance.yahoo.com/news/meta-350-billio...,https://media.zenfs.com/en/bloomberg_markets_8...,en,2024-04-24 11:37:03+00:00,finance.yahoo.com,,"[{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex...",[]
177,e6e28a1b-4585-4625-ad78-e705dda02f4a,Tesla accelerates affordable car launch after ...,Tesla Inc. races to reignite demand with an am...,,Tesla Inc. races to reignite demand with an am...,https://www.biznews.com/global-investing/2024/...,https://www.biznews.com/wp-content/uploads/202...,en,2024-04-24 11:23:31+00:00,biznews.com,,"[{'symbol': 'TSLA', 'name': 'Tesla, Inc.', 'ex...",[]


In [13]:
from datetime import datetime, timedelta

# Manually set the start date to the day after the last successful fetch
last_successful_date = datetime(2017, 1, 31)  # This is the last date you know was completed
start_date = last_successful_date + timedelta(days=1)


In [14]:
import requests
import time

def fetch_articles(api_key, start_date, end_date):
    url = "https://api.marketaux.com/v1/news/all?symbols=TSLA&filter_entities=true&language=en&api_token=iy6rRX4oxFrouZocXr8JNpOzaxZLk3UvMfoMGxYs"
    headers = {"x-api-key": api_key}
    params = {
        "tickers": "TSLA",
        "filter_entities": "true",
        "language": "en",
        "from": start_date.strftime('%Y-%m-%d'),
        "to": end_date.strftime('%Y-%m-%d'),
        "page": 1
    }
    articles = []

    while True:
        response = requests.get(url, headers=headers, params=params)
        if response.status_code == 200:
            data = response.json()['data']
            articles.extend(data)
            if len(data) < 3:
                break
            params['page'] += 1
        else:
            print(f"Failed to fetch data: {response.text}")
            time.sleep(60)  # Wait a minute before retrying
            continue  # Optionally retry the same date range

        if params['page'] > 100:  # Adjust based on your daily limit
            break
        
        time.sleep(10)  # Sleep to avoid hitting the rate limit

    return articles

def fetch_data_from_date(api_key, start_date):
    current_date = datetime.now()
    while start_date <= current_date:
        end_date = start_date + timedelta(days=30)
        if end_date > current_date:
            end_date = current_date

        print(f"Fetching from {start_date.date()} to {end_date.date()}")
        articles = fetch_articles(api_key, start_date, end_date)
        
        # Assuming you have a function to save articles
        save_articles_to_json(articles)  # Save articles after each successful fetch

        start_date += timedelta(days=31)  # Move to the next month
        if len(articles) == 0:
            break  # Break the loop if no articles are returned

# Example usage
api_key = "iy6rRX4oxFrouZocXr8JNpOzaxZLk3UvMfoMGxYs"
fetch_data_from_date(api_key, start_date)


Fetching from 2017-02-01 to 2017-03-03
Failed to fetch data: {"error":{"code":"usage_limit_reached","message":"The usage limit for this account has been reached."}}
Failed to fetch data: {"error":{"code":"usage_limit_reached","message":"The usage limit for this account has been reached."}}
Failed to fetch data: {"error":{"code":"usage_limit_reached","message":"The usage limit for this account has been reached."}}
Failed to fetch data: {"error":{"code":"usage_limit_reached","message":"The usage limit for this account has been reached."}}
Failed to fetch data: {"error":{"code":"usage_limit_reached","message":"The usage limit for this account has been reached."}}
Failed to fetch data: {"error":{"code":"usage_limit_reached","message":"The usage limit for this account has been reached."}}


KeyboardInterrupt: 

In [15]:
def save_articles_to_json(articles):
    with open('tesla_articles.json', 'a') as file:
        json.dump(articles, file)
