bohmian commited on
Commit
276a78e
1 Parent(s): 4574653

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +150 -0
app.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from urllib.request import urlopen, Request
3
+ from bs4 import BeautifulSoup
4
+ import pandas as pd
5
+ import plotly
6
+ import plotly.express as px
7
+ import json # for graph plotting in website
8
+ # NLTK VADER for sentiment analysis
9
+ import nltk
10
+ nltk.downloader.download('vader_lexicon')
11
+ from nltk.sentiment.vader import SentimentIntensityAnalyzer
12
+
13
+ import subprocess
14
+ import os
15
+
16
+ import datetime
17
+
18
+ st.set_page_config(page_title = "Bohmian's Stock News Sentiment Analyzer", layout = "wide")
19
+
20
+
21
+ def get_news(ticker):
22
+ url = finviz_url + ticker
23
+ req = Request(url=url,headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'})
24
+ response = urlopen(req)
25
+ # Read the contents of the file into 'html'
26
+ html = BeautifulSoup(response)
27
+ # Find 'news-table' in the Soup and load it into 'news_table'
28
+ news_table = html.find(id='news-table')
29
+ return news_table
30
+
31
+ # parse news into dataframe
32
+ def parse_news(news_table):
33
+ parsed_news = []
34
+ today_string = datetime.datetime.today().strftime('%Y-%m-%d')
35
+
36
+ for x in news_table.findAll('tr'):
37
+ try:
38
+ # read the text from each tr tag into text
39
+ # get text from a only
40
+ text = x.a.get_text()
41
+ # splite text in the td tag into a list
42
+ date_scrape = x.td.text.split()
43
+ # if the length of 'date_scrape' is 1, load 'time' as the only element
44
+
45
+ if len(date_scrape) == 1:
46
+ time = date_scrape[0]
47
+
48
+ # else load 'date' as the 1st element and 'time' as the second
49
+ else:
50
+ date = date_scrape[0]
51
+ time = date_scrape[1]
52
+
53
+ # Append ticker, date, time and headline as a list to the 'parsed_news' list
54
+ parsed_news.append([date, time, text])
55
+ except:
56
+ pass
57
+
58
+ # Set column names
59
+ columns = ['date', 'time', 'headline']
60
+ # Convert the parsed_news list into a DataFrame called 'parsed_and_scored_news'
61
+ parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
62
+ # Create a pandas datetime object from the strings in 'date' and 'time' column
63
+ parsed_news_df['date'] = parsed_news_df['date'].replace("Today", today_string)
64
+ parsed_news_df['datetime'] = pd.to_datetime(parsed_news_df['date'] + ' ' + parsed_news_df['time'])
65
+
66
+ return parsed_news_df
67
+
68
+
69
+
70
+ def score_news(parsed_news_df):
71
+ # Instantiate the sentiment intensity analyzer
72
+ vader = SentimentIntensityAnalyzer()
73
+
74
+ # Iterate through the headlines and get the polarity scores using vader
75
+ scores = parsed_news_df['headline'].apply(vader.polarity_scores).tolist()
76
+
77
+ # Convert the 'scores' list of dicts into a DataFrame
78
+ scores_df = pd.DataFrame(scores)
79
+
80
+ # Join the DataFrames of the news and the list of dicts
81
+ parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')
82
+ parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')
83
+ parsed_and_scored_news = parsed_and_scored_news.drop(['date', 'time'], 1)
84
+ parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})
85
+
86
+ return parsed_and_scored_news
87
+
88
+
89
+ def plot_hourly_sentiment(parsed_and_scored_news, ticker):
90
+
91
+ # Group by date and ticker columns from scored_news and calculate the mean
92
+ mean_scores = parsed_and_scored_news.resample('H').mean()
93
+
94
+ # Plot a bar chart with plotly
95
+ fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title = ticker + ' Hourly Sentiment Scores')
96
+ return fig # instead of using fig.show(), we return fig and turn it into a graphjson object for displaying in web page later
97
+
98
+ def plot_daily_sentiment(parsed_and_scored_news, ticker):
99
+
100
+ # Group by date and ticker columns from scored_news and calculate the mean
101
+ mean_scores = parsed_and_scored_news.resample('D').mean()
102
+
103
+ # Plot a bar chart with plotly
104
+ fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title = ticker + ' Daily Sentiment Scores')
105
+ return fig # instead of using fig.show(), we return fig and turn it into a graphjson object for displaying in web page later
106
+
107
+ # for extracting data from finviz
108
+ finviz_url = 'https://finviz.com/quote.ashx?t='
109
+
110
+
111
+ st.header("Bohmian's Stock News Sentiment Analyzer")
112
+
113
+ ticker = st.text_input('Enter Stock Ticker', '').upper()
114
+
115
+ df = pd.DataFrame({'datetime': datetime.datetime.now(), 'ticker': ticker}, index = [0])
116
+
117
+
118
+ try:
119
+ st.subheader("Hourly and Daily Sentiment of {} Stock".format(ticker))
120
+ news_table = get_news(ticker)
121
+ parsed_news_df = parse_news(news_table)
122
+ print(parsed_news_df)
123
+ parsed_and_scored_news = score_news(parsed_news_df)
124
+ fig_hourly = plot_hourly_sentiment(parsed_and_scored_news, ticker)
125
+ fig_daily = plot_daily_sentiment(parsed_and_scored_news, ticker)
126
+
127
+ st.plotly_chart(fig_hourly)
128
+ st.plotly_chart(fig_daily)
129
+
130
+ description = """
131
+ The above chart averages the sentiment scores of {} stock hourly and daily.
132
+ The table below gives each of the most recent headlines of the stock and the negative, neutral, positive and an aggregated sentiment score.
133
+ The news headlines are obtained from the FinViz website.
134
+ Sentiments are given by the nltk.sentiment.vader Python library.
135
+ """.format(ticker)
136
+
137
+ st.write(description)
138
+ st.table(parsed_and_scored_news)
139
+
140
+ except Exception as e:
141
+ print(str(e))
142
+ st.write("Enter a correct stock ticker, e.g. 'AAPL' above and hit Enter.")
143
+
144
+ hide_streamlit_style = """
145
+ <style>
146
+ #MainMenu {visibility: hidden;}
147
+ footer {visibility: hidden;}
148
+ </style>
149
+ """
150
+ st.markdown(hide_streamlit_style, unsafe_allow_html=True)