fin_proj_docker_2

Runtime error

File size: 5,884 Bytes

from fastapi import FastAPI
import requests

# from telegram import ChatAction
import os
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import pandas as pd
import json  # for graph plotting in website



import datetime

import yfinance as yf
from pandas_datareader import data as pdr
from nixtlats import TimeGPT
from nixtlats import NixtlaClient

import numpy as np
import seaborn as sns
import sklearn.metrics as metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
import xgboost as xgb


app = FastAPI()


@app.get("/")
def read_root():
    return {
        "message": "Hello, Please type a ticker at the end of the URL to get the stock sentiment."

    }

# XGboost


 
def data_download(ticker: str):
    ticker = ticker.upper()
    # Define the list of tickers
    index_list = [
        ticker, '^VIX', '^VVIX', '^VIX9D', '^VIX3M', '^VIX6M', '^FVX', '^TNX', '^TYX'
    ]

    data = yf.download(index_list, start="1994-01-01", end=None)['Adj Close']
    data = data.fillna(method='ffill')
    df = data.reset_index().round(2)
    df = df.rename(columns={
        '^VIX': 'VIX', '^VVIX': 'VIX_Index',
        '^VIX9D': 'VIX9D', '^VIX3M': 'VIX3M', '^VIX6M': 'VIX6M',
        '^FVX': 'T5Y', '^TNX': 'T10Y', '^TYX': 'T30Y'
    })
    df['DDate'] = df['Date']
    df['Day'] = pd.to_datetime(df['DDate']).dt.day
    df['Month'] = pd.to_datetime(df['DDate']).dt.month
    df['Year'] = pd.to_datetime(df['DDate']).dt.year
    df = df.set_index('Date')
    return df


    
def data_manipolation(df,ticker: str):
    ticker = ticker.upper()
    # MA calculation for all columns 
    New_Names=[ticker,'VIX','VIX_Index','VIX9D','VIX3M','VIX6M','T5Y','T10Y','T30Y']
    for col in New_Names:
        df[col + "_MA30"] = df[col].rolling(window=30).mean().round(2)
        df[col + "/_MA30"] = (df[col]/df[col + "_MA30"]).round(4)
    # Identify numeric time series columns (assuming columns with numeric datatypes)
    numeric_cols = [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])]
    # Filter columns to ensure there are at least 2 rows for time series analysis
    timeseries_cols = [col for col in numeric_cols if len(df) > 1]

    # Calculate daily changes and percentage changes for required intervals
    for col in timeseries_cols:
        # Calculate daily change and store in temporary variable
        daily_change = df[col].diff().round(2)

        # Store all computed changes in the DataFrame at once to minimize DataFrame modifications
        df[col + "_p"] = daily_change
        df[col + "_c1"] = (daily_change / df[col].shift()).round(4) * 100  # Optimized 1-day percentage change


    suffixes = ['_p', '_c1', '_MA30', '/_MA30']
    basic_cols = ['T5Y', 'T10Y', 'T30Y', 'VIX']
    to_keep = basic_cols + [f"{col}{suffix}" for col in basic_cols for suffix in suffixes]
    ticker_columns = [ticker + suffix for suffix in ['_c1']]
    to_keep.extend(ticker_columns)

    # Filter the DataFrame to keep only specified columns and drop rows with missing values
    df = df[to_keep].dropna()

    return df
       
def data_split_train_test(df,ticker: str):
    ticker = ticker.upper()
    X = df.loc[:,df.columns !=  ticker + '_c1']
    y = df[ ticker + '_c1']
    recent_data_size = int(0.3 * len(X))  # Adjust the percentage as needed
    print (recent_data_size) 
    
    Xtrain = X.head(len(X) - recent_data_size)  # Extract the remaining data for training
    ytrain = y.head(len(y) - recent_data_size)
    
    Xtest = X.tail(recent_data_size)  # Extract the most recent data points for testing
    ytest = y.tail(recent_data_size)
    Xtest = Xtest.iloc[30:]
    ytest = ytest.iloc[30:]
    
    return Xtrain, ytrain, Xtest, ytest   

def xgb_training_forecast(Xtrain, ytrain, Xtest, ytest):
    reg = xgb.XGBRegressor(base_score=0.5, booster='gbtree',    
                       n_estimators=1000,
                       objective='reg:linear',
                       max_depth=3,
                       learning_rate=0.01)

    model =reg.fit(Xtrain, ytrain)

    last_data = Xtest.iloc[-1, :]
    X_init = last_data.to_numpy()
    X_init = X_init.reshape(1, -1)

  
    prediction = model.predict(X_init)[0]

    return prediction



@app.get("/ticker/{ticker}")
def read_item(ticker: str):
    df= data_download(ticker)
    df = data_manipolation(df,ticker) 
    df=df.round(2)
    Xtrain, ytrain, Xtest, ytest = data_split_train_test(df,ticker)
    forecast_value = xgb_training_forecast(Xtrain, ytrain, Xtest, ytest)
    #result = forecast_value.to_json(orient="records")
    return json.dumps(forecast_value.round(2), cls=NumpyEncoder)

class NumpyEncoder(json.JSONEncoder):
    """Custom encoder for numpy data types"""

    def default(self, obj):
        # Serialize numpy arrays as lists
        if isinstance(obj, np.ndarray):
            return obj.tolist()

        # Convert numpy integers to Python int
        if isinstance(obj, (np.int_, np.intc, np.intp, np.int8, np.int16,
                            np.int32, np.int64, np.uint8, np.uint16,
                            np.uint32, np.uint64)):
            return int(obj)

        # Convert numpy floats to Python float
        if isinstance(obj, (np.float_, np.float16, np.float32, np.float64)):
            return float(obj)

        # Convert numpy complex numbers to dict
        if isinstance(obj, (np.complex_, np.complex64, np.complex128)):
            return {"real": obj.real, "imag": obj.imag}

        # Use the default encoder for other types
        return json.JSONEncoder.default(self, obj)