Spaces:

Sanjeevl
/

StockSavvyFinal

Sleeping

File size: 5,489 Bytes

1a2a035

# !pip install transformers
from transformers import pipeline
from client import AlpacaNewsFetcher
from alpaca_trade_api import REST
import os
from dotenv import load_dotenv
import pandas as pd
import matplotlib.pyplot as plt
from collections import defaultdict
from datetime import date



class NewsSentimentAnalysis:
    """

  A class for sentiment analysis of news articles using the Transformers library.



  Attributes:

  - classifier (pipeline): Sentiment analysis pipeline from Transformers.

  """

    def __init__(self):
        """

    Initializes the NewsSentimentAnalysis object.

    """
        self.classifier = pipeline('sentiment-analysis')

    def analyze_sentiment(self, news_article):
        """

    Analyzes the sentiment of a given news article.



    Args:

    - news_article (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.



    Returns:

    - dict: A dictionary containing sentiment analysis results.

    """
        summary = news_article['summary']
        title = news_article['title']
        timestamp = news_article['timestamp']

        relevant_text = summary + title
        sentiment_result = self.classifier(relevant_text)

        analysis_result = {
            'timestamp': timestamp,
            'title': title,
            'summary': summary,
            'sentiment': sentiment_result
        }

        return analysis_result
    
    def plot_sentiment_graph(self, sentiment_analysis_result):
        """

        Plots a sentiment analysis graph 



        Args:

        - sentiment_analysis_result): (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.



        Returns:

        - dict: A dictionary containing sentiment analysis results.

        """
        df = pd.DataFrame(sentiment_analysis_result)
        df['Timestamp'] = pd.to_datetime(df['Timestamp'])
        df['Date'] = df['Timestamp'].dt.date

        #Group by Date, sentiment value count
        grouped = df.groupby(by='Date')['Sentiment'].value_counts()
        grouped.plot.pie()
        

    def get_dominant_sentiment (self, sentiment_analysis_result):
        """

        Returns overall sentiment, negative or positive or neutral depending on the count of negative sentiment vs positive sentiment 



        Args:

        - sentiment_analysis_result): (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.



        Returns:

        - dict: A dictionary containing sentiment analysis results.

        """
        df = pd.DataFrame(sentiment_analysis_result)
        df['Timestamp'] = pd.to_datetime(df['Timestamp'])
        df['Date'] = df['Timestamp'].dt.date

        #Group by Date, sentiment value count
        grouped = df.groupby(by='Date')['Sentiment'].value_counts()
        df = pd.DataFrame(list(grouped.items()), columns=['Sentiment', 'count'])
        df['date'] = df['Sentiment'].apply(lambda x: x[0])
        df['sentiment'] = df['Sentiment'].apply(lambda x: x[1])
        df.drop('Sentiment', axis=1, inplace=True)
        result = df.groupby('sentiment')['count'].sum().reset_index()
        
        # Determine the sentiment with the most count
        dominant_sentiment = result.loc[result['count'].idxmax()]

        return dominant_sentiment



#starting point of the program
if __name__ == '__main__':
    # Example Usage:
    # Initialize the AlpacaNewsFetcher object

    #Load Alpaca Key and Secret from environment.
    load_dotenv()
    api_key = os.environ["ALPACA_API_KEY"]
    api_secret = os.environ["ALPACA_SECRET"]

    #Initialize AlpacaNewsFetcher, a class for fetching news articles related to a specific stock from Alpaca API.
    news_fetcher = AlpacaNewsFetcher(api_key, api_secret)

    # Fetch news (contains - title of the news, timestamp and summary) for AAPL from 2021-01-01 to 2021-12-31
    news_data = news_fetcher.fetch_news("AAPL", "2021-01-01", "2021-12-31")

    # Initialize the NewsSentimentAnalysis object
    news_sentiment_analyzer = NewsSentimentAnalysis()
    analysis_result = []
    # Assume 'news_data' is a list of news articles (each as a dictionary)
    for article in news_data:
        sentiment_analysis_result = news_sentiment_analyzer.analyze_sentiment(article)

        # Display sentiment analysis results
        """ print(f'Timestamp: {sentiment_analysis_result["timestamp"]}, '

              f'Title: {sentiment_analysis_result["title"]}, '

              f'Summary: {sentiment_analysis_result["summary"]}')



        print(f'Sentiment: {sentiment_analysis_result["sentiment"]}', '\n') """

        #Extracting timestamp of article and sentiment of article for graphing
        result = {
                    'Timestamp': sentiment_analysis_result["timestamp"],
                    'News- Title:Summary': sentiment_analysis_result["title"] + sentiment_analysis_result["summary"],
                    'Sentiment': sentiment_analysis_result["sentiment"][0]['label']
                }
        
        analysis_result.append(result)

    #Graph dominant sentiment based on sentiment analysis data of news articles
    dominant_sentiment = news_sentiment_analyzer.get_dominant_sentiment(analysis_result)
    
    final_result = {
        'Sentiment-analysis-result' : analysis_result,
        'Dominant-sentiment' : dominant_sentiment['sentiment']
    }

    print(final_result)