File size: 4,021 Bytes
7500af4
6253914
85da89b
 
6253914
 
 
 
 
0d9571b
6253914
 
 
 
 
 
 
 
 
 
7500af4
 
 
 
 
 
75ee7e0
9cf7736
75ee7e0
 
 
4d42386
 
6253914
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b8c03a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
from fastapi import FastAPI
import requests

# from telegram import ChatAction
import os
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import pandas as pd
import json  # for graph plotting in website

# NLTK VADER for sentiment analysis
import nltk

nltk.downloader.download("vader_lexicon")
from nltk.sentiment.vader import SentimentIntensityAnalyzer

import subprocess
import os

import datetime

app = FastAPI()


@app.get("/")
def read_root():
    return {
        "message": "Hello!!!, Please type a ticker at the end of the URL to get the stock sentiment.",
        "format": "https://yaakovy-fin-proj-docker.hf.space/ticker/[TICKER]",
        "example": "https://yaakovy-fin-proj-docker.hf.space/ticker/msft",
    }


def get_news(ticker):
    url = finviz_url + ticker
    req = Request(
        url=url,
        headers={
            "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0"
        },
    )
    response = urlopen(req)
    # Read the contents of the file into 'html'
    html = BeautifulSoup(response)
    # Find 'news-table' in the Soup and load it into 'news_table'
    news_table = html.find(id="news-table")
    return news_table


# parse news into dataframe
def parse_news(news_table):
    parsed_news = []
    today_string = datetime.datetime.today().strftime("%Y-%m-%d")

    for x in news_table.findAll("tr"):
        try:
            # read the text from each tr tag into text
            # get text from a only
            text = x.a.get_text()
            # splite text in the td tag into a list
            date_scrape = x.td.text.split()
            # if the length of 'date_scrape' is 1, load 'time' as the only element

            if len(date_scrape) == 1:
                time = date_scrape[0]

            # else load 'date' as the 1st element and 'time' as the second
            else:
                date = date_scrape[0]
                time = date_scrape[1]

            # Append ticker, date, time and headline as a list to the 'parsed_news' list
            parsed_news.append([date, time, text])
        except:
            pass

        # Set column names
        columns = ["date", "time", "headline"]
        # Convert the parsed_news list into a DataFrame called 'parsed_and_scored_news'
        parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
        # Create a pandas datetime object from the strings in 'date' and 'time' column
        parsed_news_df["date"] = parsed_news_df["date"].replace("Today", today_string)
        # parsed_news_df["datetime"] = pd.to_datetime(
        #     parsed_news_df["date"] + " " + parsed_news_df["time"],
        #     format="%Y-%m-%d %H:%M",
        # )

    return parsed_news_df


def score_news(parsed_news_df):
    # Instantiate the sentiment intensity analyzer
    vader = SentimentIntensityAnalyzer()

    # Iterate through the headlines and get the polarity scores using vader
    scores = parsed_news_df["headline"].apply(vader.polarity_scores).tolist()

    # Convert the 'scores' list of dicts into a DataFrame
    scores_df = pd.DataFrame(scores)

    # Join the DataFrames of the news and the list of dicts
    parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix="_right")
    # parsed_and_scored_news = parsed_and_scored_news.set_index("datetime")
    parsed_and_scored_news = parsed_and_scored_news.drop(["date", "time"], axis=1)
    parsed_and_scored_news = parsed_and_scored_news.rename(
        columns={"compound": "sentiment_score"}
    )
    return parsed_and_scored_news


# for extracting data from finviz
finviz_url = "https://finviz.com/quote.ashx?t="


def get_stock_data(ticker):
    news_table = get_news(ticker)
    parsed_news_df = parse_news(news_table)
    parsed_and_scored_news = score_news(parsed_news_df)
    return parsed_and_scored_news


@app.get("/ticker/{ticker}")
def read_item(ticker: str):
    stock_data = get_stock_data(ticker)
    result = stock_data.to_json(orient="records")
    return result