# %%
from dotenv import load_dotenv
from datetime import datetime, timedelta
import requests
import os
import time
import pandas as pd 
from textblob import TextBlob

# %%
def process_news_articles(news_articles):
    # Convert list of dictionaries to DataFrame
    df = pd.DataFrame(news_articles)

    # Drop rows where the description is NaN
    df = df.dropna(subset=['description'])

    # Fill missing 'amp_url' and 'keywords' with specific placeholders
    df['amp_url'] = df['amp_url'].fillna('No URL provided')
    df['keywords'] = df['keywords'].fillna('No keywords')

    # Sentiment analysis on descriptions
    df['sentiment'] = df['description'].apply(lambda text: TextBlob(text).sentiment.polarity)

    # Convert 'published_utc' to datetime and extract date and time
    df['published_utc'] = pd.to_datetime(df['published_utc'])
    df['date'] = df['published_utc'].dt.date
    df['time'] = df['published_utc'].dt.time

    # Drop unnecessary columns
    df.drop(['published_utc'], axis=1, inplace=True)
    # set date to index
    df = df.set_index("date")
    df.reset_index(inplace=True)
    df.index = pd.to_datetime(df.index)
    df = df.groupby(['date', 'ticker'])['sentiment'].mean().reset_index(name='sentiment')

    return df

# %%
def exponential_moving_average(df, window):
    # Calculate EMA on the 'sentiment' column
    df[f'exp_mean_{window}_days'] = df['sentiment'].ewm(span=window, adjust=False).mean()
    return df