Spaces:
Running
Running
Akshayram1
commited on
Commit
•
10bfc4e
1
Parent(s):
073f2cc
Update app (3).py
Browse files- app (3).py +43 -36
app (3).py
CHANGED
@@ -6,6 +6,7 @@ import plotly
|
|
6 |
import plotly.express as px
|
7 |
import json # for graph plotting in website
|
8 |
# NLTK VADER for sentiment analysis
|
|
|
9 |
import nltk
|
10 |
nltk.downloader.download('vader_lexicon')
|
11 |
from nltk.sentiment.vader import SentimentIntensityAnalyzer
|
@@ -15,7 +16,7 @@ import os
|
|
15 |
|
16 |
import datetime
|
17 |
|
18 |
-
st.set_page_config(page_title = "
|
19 |
|
20 |
|
21 |
def get_news(ticker):
|
@@ -29,40 +30,41 @@ def get_news(ticker):
|
|
29 |
return news_table
|
30 |
|
31 |
# parse news into dataframe
|
|
|
|
|
32 |
def parse_news(news_table):
|
33 |
parsed_news = []
|
34 |
-
today_string = datetime.datetime.today().strftime('%Y-%m-%d')
|
35 |
|
36 |
for x in news_table.findAll('tr'):
|
37 |
try:
|
38 |
-
#
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
#
|
44 |
-
|
45 |
if len(date_scrape) == 1:
|
|
|
46 |
time = date_scrape[0]
|
47 |
-
|
48 |
-
# else load 'date' as the 1st element and 'time' as the second
|
49 |
else:
|
50 |
date = date_scrape[0]
|
51 |
time = date_scrape[1]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
|
62 |
-
# Create a pandas datetime object from the strings in 'date' and 'time' column
|
63 |
-
parsed_news_df['date'] = parsed_news_df['date'].replace("Today", today_string)
|
64 |
-
parsed_news_df['datetime'] = pd.to_datetime(parsed_news_df['date'] + ' ' + parsed_news_df['time'])
|
65 |
-
|
66 |
return parsed_news_df
|
67 |
|
68 |
|
@@ -80,35 +82,40 @@ def score_news(parsed_news_df):
|
|
80 |
# Join the DataFrames of the news and the list of dicts
|
81 |
parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')
|
82 |
parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')
|
83 |
-
parsed_and_scored_news = parsed_and_scored_news.drop(['date', 'time'], 1)
|
84 |
parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})
|
85 |
|
86 |
return parsed_and_scored_news
|
87 |
|
88 |
|
|
|
89 |
def plot_hourly_sentiment(parsed_and_scored_news, ticker):
|
90 |
-
|
|
|
|
|
91 |
# Group by date and ticker columns from scored_news and calculate the mean
|
92 |
-
mean_scores =
|
93 |
|
94 |
-
# Plot a bar chart with
|
95 |
-
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title
|
96 |
-
return fig
|
97 |
|
98 |
def plot_daily_sentiment(parsed_and_scored_news, ticker):
|
99 |
-
|
|
|
|
|
100 |
# Group by date and ticker columns from scored_news and calculate the mean
|
101 |
-
mean_scores =
|
|
|
|
|
|
|
|
|
102 |
|
103 |
-
# Plot a bar chart with plotly
|
104 |
-
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title = ticker + ' Daily Sentiment Scores')
|
105 |
-
return fig # instead of using fig.show(), we return fig and turn it into a graphjson object for displaying in web page later
|
106 |
|
107 |
# for extracting data from finviz
|
108 |
finviz_url = 'https://finviz.com/quote.ashx?t='
|
109 |
|
110 |
|
111 |
-
st.header("
|
112 |
|
113 |
ticker = st.text_input('Enter Stock Ticker', '').upper()
|
114 |
|
|
|
6 |
import plotly.express as px
|
7 |
import json # for graph plotting in website
|
8 |
# NLTK VADER for sentiment analysis
|
9 |
+
from dateutil import parser
|
10 |
import nltk
|
11 |
nltk.downloader.download('vader_lexicon')
|
12 |
from nltk.sentiment.vader import SentimentIntensityAnalyzer
|
|
|
16 |
|
17 |
import datetime
|
18 |
|
19 |
+
st.set_page_config(page_title = "Stock News Sentiment Analyzer", layout = "wide")
|
20 |
|
21 |
|
22 |
def get_news(ticker):
|
|
|
30 |
return news_table
|
31 |
|
32 |
# parse news into dataframe
|
33 |
+
|
34 |
+
|
35 |
def parse_news(news_table):
|
36 |
parsed_news = []
|
|
|
37 |
|
38 |
for x in news_table.findAll('tr'):
|
39 |
try:
|
40 |
+
# Get the headline text
|
41 |
+
text = x.a.get_text()
|
42 |
+
# Get the date and time from the first <td> tag
|
43 |
+
date_scrape = x.td.text.strip().split()
|
44 |
+
|
45 |
+
# Handle cases where only time is present
|
|
|
46 |
if len(date_scrape) == 1:
|
47 |
+
date = datetime.datetime.today().strftime('%Y-%m-%d')
|
48 |
time = date_scrape[0]
|
|
|
|
|
49 |
else:
|
50 |
date = date_scrape[0]
|
51 |
time = date_scrape[1]
|
52 |
+
|
53 |
+
# Parse the date and time using dateutil.parser
|
54 |
+
datetime_str = f"{date} {time}"
|
55 |
+
datetime_parsed = parser.parse(datetime_str)
|
56 |
+
|
57 |
+
# Append the parsed news to the list
|
58 |
+
parsed_news.append([datetime_parsed, text])
|
59 |
|
60 |
+
except Exception as e:
|
61 |
+
print("Error parsing news:", e)
|
62 |
+
continue
|
63 |
+
|
64 |
+
# Convert the list to a DataFrame
|
65 |
+
columns = ['datetime', 'headline']
|
66 |
+
parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
|
67 |
+
|
|
|
|
|
|
|
|
|
|
|
68 |
return parsed_news_df
|
69 |
|
70 |
|
|
|
82 |
# Join the DataFrames of the news and the list of dicts
|
83 |
parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')
|
84 |
parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')
|
|
|
85 |
parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})
|
86 |
|
87 |
return parsed_and_scored_news
|
88 |
|
89 |
|
90 |
+
|
91 |
def plot_hourly_sentiment(parsed_and_scored_news, ticker):
|
92 |
+
# Ensure that only numeric columns are resampled
|
93 |
+
numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
|
94 |
+
|
95 |
# Group by date and ticker columns from scored_news and calculate the mean
|
96 |
+
mean_scores = numeric_cols.resample('h').mean()
|
97 |
|
98 |
+
# Plot a bar chart with Plotly
|
99 |
+
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Hourly Sentiment Scores')
|
100 |
+
return fig # Return the figure to display in the Streamlit app
|
101 |
|
102 |
def plot_daily_sentiment(parsed_and_scored_news, ticker):
|
103 |
+
# Ensure that only numeric columns are resampled
|
104 |
+
numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
|
105 |
+
|
106 |
# Group by date and ticker columns from scored_news and calculate the mean
|
107 |
+
mean_scores = numeric_cols.resample('D').mean()
|
108 |
+
|
109 |
+
# Plot a bar chart with Plotly
|
110 |
+
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Daily Sentiment Scores')
|
111 |
+
return fig # Return the figure to display in the Streamlit app
|
112 |
|
|
|
|
|
|
|
113 |
|
114 |
# for extracting data from finviz
|
115 |
finviz_url = 'https://finviz.com/quote.ashx?t='
|
116 |
|
117 |
|
118 |
+
st.header("Stock News Sentiment Analyzer")
|
119 |
|
120 |
ticker = st.text_input('Enter Stock Ticker', '').upper()
|
121 |
|