Spaces:
Sleeping
Sleeping
File size: 4,021 Bytes
7500af4 6253914 85da89b 6253914 0d9571b 6253914 7500af4 75ee7e0 9cf7736 75ee7e0 4d42386 6253914 4b8c03a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
from fastapi import FastAPI
import requests
# from telegram import ChatAction
import os
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import pandas as pd
import json # for graph plotting in website
# NLTK VADER for sentiment analysis
import nltk
nltk.downloader.download("vader_lexicon")
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import subprocess
import os
import datetime
app = FastAPI()
@app.get("/")
def read_root():
return {
"message": "Hello!!!, Please type a ticker at the end of the URL to get the stock sentiment.",
"format": "https://yaakovy-fin-proj-docker.hf.space/ticker/[TICKER]",
"example": "https://yaakovy-fin-proj-docker.hf.space/ticker/msft",
}
def get_news(ticker):
url = finviz_url + ticker
req = Request(
url=url,
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0"
},
)
response = urlopen(req)
# Read the contents of the file into 'html'
html = BeautifulSoup(response)
# Find 'news-table' in the Soup and load it into 'news_table'
news_table = html.find(id="news-table")
return news_table
# parse news into dataframe
def parse_news(news_table):
parsed_news = []
today_string = datetime.datetime.today().strftime("%Y-%m-%d")
for x in news_table.findAll("tr"):
try:
# read the text from each tr tag into text
# get text from a only
text = x.a.get_text()
# splite text in the td tag into a list
date_scrape = x.td.text.split()
# if the length of 'date_scrape' is 1, load 'time' as the only element
if len(date_scrape) == 1:
time = date_scrape[0]
# else load 'date' as the 1st element and 'time' as the second
else:
date = date_scrape[0]
time = date_scrape[1]
# Append ticker, date, time and headline as a list to the 'parsed_news' list
parsed_news.append([date, time, text])
except:
pass
# Set column names
columns = ["date", "time", "headline"]
# Convert the parsed_news list into a DataFrame called 'parsed_and_scored_news'
parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
# Create a pandas datetime object from the strings in 'date' and 'time' column
parsed_news_df["date"] = parsed_news_df["date"].replace("Today", today_string)
# parsed_news_df["datetime"] = pd.to_datetime(
# parsed_news_df["date"] + " " + parsed_news_df["time"],
# format="%Y-%m-%d %H:%M",
# )
return parsed_news_df
def score_news(parsed_news_df):
# Instantiate the sentiment intensity analyzer
vader = SentimentIntensityAnalyzer()
# Iterate through the headlines and get the polarity scores using vader
scores = parsed_news_df["headline"].apply(vader.polarity_scores).tolist()
# Convert the 'scores' list of dicts into a DataFrame
scores_df = pd.DataFrame(scores)
# Join the DataFrames of the news and the list of dicts
parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix="_right")
# parsed_and_scored_news = parsed_and_scored_news.set_index("datetime")
parsed_and_scored_news = parsed_and_scored_news.drop(["date", "time"], axis=1)
parsed_and_scored_news = parsed_and_scored_news.rename(
columns={"compound": "sentiment_score"}
)
return parsed_and_scored_news
# for extracting data from finviz
finviz_url = "https://finviz.com/quote.ashx?t="
def get_stock_data(ticker):
news_table = get_news(ticker)
parsed_news_df = parse_news(news_table)
parsed_and_scored_news = score_news(parsed_news_df)
return parsed_and_scored_news
@app.get("/ticker/{ticker}")
def read_item(ticker: str):
stock_data = get_stock_data(ticker)
result = stock_data.to_json(orient="records")
return result
|