StockSavvyFinal / sentiment_analysis /sentiment_analysis_pipeline.py
sanjeevl10
First Check-in
1a2a035
# !pip install transformers
from transformers import pipeline
from client import AlpacaNewsFetcher
from alpaca_trade_api import REST
import os
from dotenv import load_dotenv
import pandas as pd
import matplotlib.pyplot as plt
from collections import defaultdict
from datetime import date
class NewsSentimentAnalysis:
"""
A class for sentiment analysis of news articles using the Transformers library.
Attributes:
- classifier (pipeline): Sentiment analysis pipeline from Transformers.
"""
def __init__(self):
"""
Initializes the NewsSentimentAnalysis object.
"""
self.classifier = pipeline('sentiment-analysis')
def analyze_sentiment(self, news_article):
"""
Analyzes the sentiment of a given news article.
Args:
- news_article (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
Returns:
- dict: A dictionary containing sentiment analysis results.
"""
summary = news_article['summary']
title = news_article['title']
timestamp = news_article['timestamp']
relevant_text = summary + title
sentiment_result = self.classifier(relevant_text)
analysis_result = {
'timestamp': timestamp,
'title': title,
'summary': summary,
'sentiment': sentiment_result
}
return analysis_result
def plot_sentiment_graph(self, sentiment_analysis_result):
"""
Plots a sentiment analysis graph
Args:
- sentiment_analysis_result): (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
Returns:
- dict: A dictionary containing sentiment analysis results.
"""
df = pd.DataFrame(sentiment_analysis_result)
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df['Date'] = df['Timestamp'].dt.date
#Group by Date, sentiment value count
grouped = df.groupby(by='Date')['Sentiment'].value_counts()
grouped.plot.pie()
def get_dominant_sentiment (self, sentiment_analysis_result):
"""
Returns overall sentiment, negative or positive or neutral depending on the count of negative sentiment vs positive sentiment
Args:
- sentiment_analysis_result): (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
Returns:
- dict: A dictionary containing sentiment analysis results.
"""
df = pd.DataFrame(sentiment_analysis_result)
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df['Date'] = df['Timestamp'].dt.date
#Group by Date, sentiment value count
grouped = df.groupby(by='Date')['Sentiment'].value_counts()
df = pd.DataFrame(list(grouped.items()), columns=['Sentiment', 'count'])
df['date'] = df['Sentiment'].apply(lambda x: x[0])
df['sentiment'] = df['Sentiment'].apply(lambda x: x[1])
df.drop('Sentiment', axis=1, inplace=True)
result = df.groupby('sentiment')['count'].sum().reset_index()
# Determine the sentiment with the most count
dominant_sentiment = result.loc[result['count'].idxmax()]
return dominant_sentiment
#starting point of the program
if __name__ == '__main__':
# Example Usage:
# Initialize the AlpacaNewsFetcher object
#Load Alpaca Key and Secret from environment.
load_dotenv()
api_key = os.environ["ALPACA_API_KEY"]
api_secret = os.environ["ALPACA_SECRET"]
#Initialize AlpacaNewsFetcher, a class for fetching news articles related to a specific stock from Alpaca API.
news_fetcher = AlpacaNewsFetcher(api_key, api_secret)
# Fetch news (contains - title of the news, timestamp and summary) for AAPL from 2021-01-01 to 2021-12-31
news_data = news_fetcher.fetch_news("AAPL", "2021-01-01", "2021-12-31")
# Initialize the NewsSentimentAnalysis object
news_sentiment_analyzer = NewsSentimentAnalysis()
analysis_result = []
# Assume 'news_data' is a list of news articles (each as a dictionary)
for article in news_data:
sentiment_analysis_result = news_sentiment_analyzer.analyze_sentiment(article)
# Display sentiment analysis results
""" print(f'Timestamp: {sentiment_analysis_result["timestamp"]}, '
f'Title: {sentiment_analysis_result["title"]}, '
f'Summary: {sentiment_analysis_result["summary"]}')
print(f'Sentiment: {sentiment_analysis_result["sentiment"]}', '\n') """
#Extracting timestamp of article and sentiment of article for graphing
result = {
'Timestamp': sentiment_analysis_result["timestamp"],
'News- Title:Summary': sentiment_analysis_result["title"] + sentiment_analysis_result["summary"],
'Sentiment': sentiment_analysis_result["sentiment"][0]['label']
}
analysis_result.append(result)
#Graph dominant sentiment based on sentiment analysis data of news articles
dominant_sentiment = news_sentiment_analyzer.get_dominant_sentiment(analysis_result)
final_result = {
'Sentiment-analysis-result' : analysis_result,
'Dominant-sentiment' : dominant_sentiment['sentiment']
}
print(final_result)