Spaces:
Sleeping
Sleeping
import streamlit as st | |
from urllib.request import urlopen, Request | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
import plotly.express as px | |
from dateutil import parser | |
import datetime | |
import requests | |
from transformers import BertTokenizer, BertForSequenceClassification, pipeline | |
st.set_page_config(page_title="Stock News Sentiment Analyzer", layout="wide") | |
# Initialize FinBERT (yiyanghkust/finbert-tone) pipeline only once and cache | |
def load_model(): | |
model = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone', num_labels=3) | |
tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone') | |
return pipeline("sentiment-analysis", model=model, tokenizer=tokenizer) | |
finbert = load_model() | |
def verify_link(url, timeout=10, retries=3): | |
for _ in range(retries): | |
try: | |
response = requests.head(url, timeout=timeout, allow_redirects=True) | |
if 200 <= response.status_code < 300: | |
return True | |
except requests.RequestException: | |
continue | |
return False | |
def get_news(ticker): | |
try: | |
finviz_url = 'https://finviz.com/quote.ashx?t=' | |
url = finviz_url + ticker | |
req = Request(url=url, headers={'User-Agent': 'Mozilla/5.0'}) | |
response = urlopen(req) | |
html = BeautifulSoup(response, 'html.parser') | |
news_table = html.find(id='news-table') | |
return news_table | |
except Exception as e: | |
st.write("Error fetching news:", str(e)) | |
return None | |
def parse_news(news_table): | |
parsed_news = [] | |
for x in news_table.findAll('tr'): | |
try: | |
text = x.a.get_text() | |
link = x.a['href'] | |
date_scrape = x.td.text.strip().split() | |
if len(date_scrape) == 1: | |
date = datetime.datetime.today().strftime('%Y-%m-%d') | |
time = date_scrape[0] | |
else: | |
date = date_scrape[0] | |
time = date_scrape[1] | |
datetime_str = f"{date} {time}" | |
datetime_parsed = parser.parse(datetime_str) | |
is_valid = verify_link(link) | |
parsed_news.append([datetime_parsed, text, link, is_valid]) | |
except Exception as e: | |
print("Error parsing news:", e) | |
continue | |
columns = ['datetime', 'headline', 'link', 'is_valid'] | |
parsed_news_df = pd.DataFrame(parsed_news, columns=columns) | |
return parsed_news_df | |
def score_news(parsed_news_df): | |
# Send headlines in smaller batches to speed up processing | |
batch_size = 10 | |
parsed_news_df = parsed_news_df.reset_index(drop=True) | |
sentiment_scores = [] | |
for i in range(0, len(parsed_news_df), batch_size): | |
batch_headlines = parsed_news_df['headline'][i:i+batch_size].tolist() | |
predictions = finbert(batch_headlines) | |
for pred in predictions: | |
label = pred['label'] | |
score = pred['score'] | |
# Map 'LABEL_0' (neutral), 'LABEL_1' (positive), and 'LABEL_2' (negative) | |
if label == 'LABEL_1': | |
sentiment_score = score | |
elif label == 'LABEL_2': | |
sentiment_score = -score | |
else: # neutral | |
sentiment_score = 0 | |
sentiment_scores.append({ | |
'sentiment_score': sentiment_score, | |
'label': label, | |
'confidence': score | |
}) | |
scores_df = pd.DataFrame(sentiment_scores) | |
parsed_and_scored_news = pd.concat([parsed_news_df, scores_df], axis=1) | |
parsed_and_scored_news = parsed_and_scored_news.set_index('datetime') | |
return parsed_and_scored_news | |
def plot_hourly_sentiment(parsed_and_scored_news, ticker): | |
mean_scores = parsed_and_scored_news['sentiment_score'].resample('h').mean() | |
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', | |
title=f'{ticker} Hourly Sentiment Scores', | |
color='sentiment_score', | |
color_continuous_scale=['red', 'yellow', 'green'], | |
range_color=[-1, 1]) | |
fig.update_layout(coloraxis_colorbar=dict( | |
title="Sentiment", | |
tickvals=[-1, 0, 1], | |
ticktext=["Negative", "Neutral", "Positive"], | |
)) | |
return fig | |
def plot_daily_sentiment(parsed_and_scored_news, ticker): | |
mean_scores = parsed_and_scored_news['sentiment_score'].resample('D').mean() | |
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', | |
title=f'{ticker} Daily Sentiment Scores', | |
color='sentiment_score', | |
color_continuous_scale=['red', 'yellow', 'green'], | |
range_color=[-1, 1]) | |
fig.update_layout(coloraxis_colorbar=dict( | |
title="Sentiment", | |
tickvals=[-1, 0, 1], | |
ticktext=["Negative", "Neutral", "Positive"], | |
)) | |
return fig | |
def get_recommendation(sentiment_scores): | |
avg_sentiment = sentiment_scores['sentiment_score'].mean() | |
if avg_sentiment >= 0.3: | |
return f"Positive sentiment (Score: {avg_sentiment:.2f}). The recent news suggests a favorable outlook for this stock. Consider buying or holding if you already own it." | |
elif avg_sentiment <= -0.3: | |
return f"Negative sentiment (Score: {avg_sentiment:.2f}). The recent news suggests caution. Consider selling or avoiding this stock for now." | |
else: | |
return f"Neutral sentiment (Score: {avg_sentiment:.2f}). The recent news doesn't show a strong bias. Consider holding if you own the stock, or watch for more definitive trends before making a decision." | |
st.header("Stock News Sentiment Analyzer (FinBERT)") | |
ticker = st.text_input('Enter Stock Ticker', '').upper() | |
try: | |
st.subheader(f"Sentiment Analysis and Recommendation for {ticker} Stock") | |
news_table = get_news(ticker) | |
if news_table: | |
parsed_news_df = parse_news(news_table) | |
parsed_and_scored_news = score_news(parsed_news_df) | |
# Generate and display recommendation | |
recommendation = get_recommendation(parsed_and_scored_news) | |
st.write(recommendation) | |
# Display a disclaimer | |
st.warning("Disclaimer: This recommendation is based solely on recent news sentiment and should not be considered as financial advice. Always do your own research and consult with a qualified financial advisor before making investment decisions.") | |
fig_hourly = plot_hourly_sentiment(parsed_and_scored_news, ticker) | |
fig_daily = plot_daily_sentiment(parsed_and_scored_news, ticker) | |
st.plotly_chart(fig_hourly) | |
st.plotly_chart(fig_daily) | |
description = f""" | |
The above charts average the sentiment scores of {ticker} stock hourly and daily. | |
The table below shows recent headlines with their sentiment scores and classifications. | |
The news headlines are obtained from the FinViz website. | |
Sentiments are analyzed using the ProsusAI/finbert model, which is specifically trained for financial text. | |
Links have been verified for validity. | |
""" | |
st.write(description) | |
parsed_and_scored_news['link'] = parsed_and_scored_news.apply( | |
lambda row: f'<a href="{row["link"]}" target="_blank">{"Valid✅" if row["is_valid"] else "Invalid❌"} Link</a>', | |
axis=1 | |
) | |
display_df = parsed_and_scored_news.drop(columns=['is_valid']) | |
st.write(display_df.to_html(escape=False), unsafe_allow_html=True) | |
else: | |
st.write("No news available or invalid ticker symbol.") | |
except Exception as e: | |
print(str(e)) | |
st.write("Enter a correct stock ticker, e.g. 'AAPL' above and hit Enter.") | |
hide_streamlit_style = """ | |
<style> | |
#MainMenu {visibility: hidden;} | |
footer {visibility: hidden;} | |
</style> | |
""" | |
st.markdown(hide_streamlit_style, unsafe_allow_html=True) | |