Spaces:

YaakovY
/

fin_proj_docker

Sleeping

File size: 4,021 Bytes

from fastapi import FastAPI
import requests

# from telegram import ChatAction
import os
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import pandas as pd
import json  # for graph plotting in website

# NLTK VADER for sentiment analysis
import nltk

nltk.downloader.download("vader_lexicon")
from nltk.sentiment.vader import SentimentIntensityAnalyzer

import subprocess
import os

import datetime

app = FastAPI()


@app.get("/")
def read_root():
    return {
        "message": "Hello!!!, Please type a ticker at the end of the URL to get the stock sentiment.",
        "format": "https://yaakovy-fin-proj-docker.hf.space/ticker/[TICKER]",
        "example": "https://yaakovy-fin-proj-docker.hf.space/ticker/msft",
    }


def get_news(ticker):
    url = finviz_url + ticker
    req = Request(
        url=url,
        headers={
            "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0"
        },
    )
    response = urlopen(req)
    # Read the contents of the file into 'html'
    html = BeautifulSoup(response)
    # Find 'news-table' in the Soup and load it into 'news_table'
    news_table = html.find(id="news-table")
    return news_table


# parse news into dataframe
def parse_news(news_table):
    parsed_news = []
    today_string = datetime.datetime.today().strftime("%Y-%m-%d")

    for x in news_table.findAll("tr"):
        try:
            # read the text from each tr tag into text
            # get text from a only
            text = x.a.get_text()
            # splite text in the td tag into a list
            date_scrape = x.td.text.split()
            # if the length of 'date_scrape' is 1, load 'time' as the only element

            if len(date_scrape) == 1:
                time = date_scrape[0]

            # else load 'date' as the 1st element and 'time' as the second
            else:
                date = date_scrape[0]
                time = date_scrape[1]

            # Append ticker, date, time and headline as a list to the 'parsed_news' list
            parsed_news.append([date, time, text])
        except:
            pass

        # Set column names
        columns = ["date", "time", "headline"]
        # Convert the parsed_news list into a DataFrame called 'parsed_and_scored_news'
        parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
        # Create a pandas datetime object from the strings in 'date' and 'time' column
        parsed_news_df["date"] = parsed_news_df["date"].replace("Today", today_string)
        # parsed_news_df["datetime"] = pd.to_datetime(
        #     parsed_news_df["date"] + " " + parsed_news_df["time"],
        #     format="%Y-%m-%d %H:%M",
        # )

    return parsed_news_df


def score_news(parsed_news_df):
    # Instantiate the sentiment intensity analyzer
    vader = SentimentIntensityAnalyzer()

    # Iterate through the headlines and get the polarity scores using vader
    scores = parsed_news_df["headline"].apply(vader.polarity_scores).tolist()

    # Convert the 'scores' list of dicts into a DataFrame
    scores_df = pd.DataFrame(scores)

    # Join the DataFrames of the news and the list of dicts
    parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix="_right")
    # parsed_and_scored_news = parsed_and_scored_news.set_index("datetime")
    parsed_and_scored_news = parsed_and_scored_news.drop(["date", "time"], axis=1)
    parsed_and_scored_news = parsed_and_scored_news.rename(
        columns={"compound": "sentiment_score"}
    )
    return parsed_and_scored_news


# for extracting data from finviz
finviz_url = "https://finviz.com/quote.ashx?t="


def get_stock_data(ticker):
    news_table = get_news(ticker)
    parsed_news_df = parse_news(news_table)
    parsed_and_scored_news = score_news(parsed_news_df)
    return parsed_and_scored_news


@app.get("/ticker/{ticker}")
def read_item(ticker: str):
    stock_data = get_stock_data(ticker)
    result = stock_data.to_json(orient="records")
    return result