File size: 5,884 Bytes
7500af4
6253914
85da89b
 
6253914
 
 
 
 
0d9571b
6253914
 
 
7500af4
ae1a042
 
 
 
 
47ce08f
 
 
 
 
 
 
 
 
 
 
 
7500af4
 
 
 
 
75ee7e0
a265793
6253914
a265793
46ee02f
47ce08f
 
fce7cab
 
47ce08f
ff56974
47ce08f
 
5b60579
47ce08f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d2c83a
 
283f9d5
c845410
fce7cab
ff56974
47ce08f
5b60579
47ce08f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5b60579
47ce08f
 
 
 
 
 
1d2c83a
fce7cab
ff56974
5b60579
 
47ce08f
 
1d2c83a
47ce08f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d2c83a
a265793
 
 
 
47ce08f
be3771b
47ce08f
be3771b
4e788f7
5672f82
4e788f7
5672f82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a265793
46ee02f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
from fastapi import FastAPI
import requests

# from telegram import ChatAction
import os
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import pandas as pd
import json  # for graph plotting in website



import datetime

import yfinance as yf
from pandas_datareader import data as pdr
from nixtlats import TimeGPT
from nixtlats import NixtlaClient

import numpy as np
import seaborn as sns
import sklearn.metrics as metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
import xgboost as xgb


app = FastAPI()


@app.get("/")
def read_root():
    return {
        "message": "Hello, Please type a ticker at the end of the URL to get the stock sentiment."

    }

# XGboost


 
def data_download(ticker: str):
    ticker = ticker.upper()
    # Define the list of tickers
    index_list = [
        ticker, '^VIX', '^VVIX', '^VIX9D', '^VIX3M', '^VIX6M', '^FVX', '^TNX', '^TYX'
    ]

    data = yf.download(index_list, start="1994-01-01", end=None)['Adj Close']
    data = data.fillna(method='ffill')
    df = data.reset_index().round(2)
    df = df.rename(columns={
        '^VIX': 'VIX', '^VVIX': 'VIX_Index',
        '^VIX9D': 'VIX9D', '^VIX3M': 'VIX3M', '^VIX6M': 'VIX6M',
        '^FVX': 'T5Y', '^TNX': 'T10Y', '^TYX': 'T30Y'
    })
    df['DDate'] = df['Date']
    df['Day'] = pd.to_datetime(df['DDate']).dt.day
    df['Month'] = pd.to_datetime(df['DDate']).dt.month
    df['Year'] = pd.to_datetime(df['DDate']).dt.year
    df = df.set_index('Date')
    return df


    
def data_manipolation(df,ticker: str):
    ticker = ticker.upper()
    # MA calculation for all columns 
    New_Names=[ticker,'VIX','VIX_Index','VIX9D','VIX3M','VIX6M','T5Y','T10Y','T30Y']
    for col in New_Names:
        df[col + "_MA30"] = df[col].rolling(window=30).mean().round(2)
        df[col + "/_MA30"] = (df[col]/df[col + "_MA30"]).round(4)
    # Identify numeric time series columns (assuming columns with numeric datatypes)
    numeric_cols = [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])]
    # Filter columns to ensure there are at least 2 rows for time series analysis
    timeseries_cols = [col for col in numeric_cols if len(df) > 1]

    # Calculate daily changes and percentage changes for required intervals
    for col in timeseries_cols:
        # Calculate daily change and store in temporary variable
        daily_change = df[col].diff().round(2)

        # Store all computed changes in the DataFrame at once to minimize DataFrame modifications
        df[col + "_p"] = daily_change
        df[col + "_c1"] = (daily_change / df[col].shift()).round(4) * 100  # Optimized 1-day percentage change


    suffixes = ['_p', '_c1', '_MA30', '/_MA30']
    basic_cols = ['T5Y', 'T10Y', 'T30Y', 'VIX']
    to_keep = basic_cols + [f"{col}{suffix}" for col in basic_cols for suffix in suffixes]
    ticker_columns = [ticker + suffix for suffix in ['_c1']]
    to_keep.extend(ticker_columns)

    # Filter the DataFrame to keep only specified columns and drop rows with missing values
    df = df[to_keep].dropna()

    return df
       
def data_split_train_test(df,ticker: str):
    ticker = ticker.upper()
    X = df.loc[:,df.columns !=  ticker + '_c1']
    y = df[ ticker + '_c1']
    recent_data_size = int(0.3 * len(X))  # Adjust the percentage as needed
    print (recent_data_size) 
    
    Xtrain = X.head(len(X) - recent_data_size)  # Extract the remaining data for training
    ytrain = y.head(len(y) - recent_data_size)
    
    Xtest = X.tail(recent_data_size)  # Extract the most recent data points for testing
    ytest = y.tail(recent_data_size)
    Xtest = Xtest.iloc[30:]
    ytest = ytest.iloc[30:]
    
    return Xtrain, ytrain, Xtest, ytest   

def xgb_training_forecast(Xtrain, ytrain, Xtest, ytest):
    reg = xgb.XGBRegressor(base_score=0.5, booster='gbtree',    
                       n_estimators=1000,
                       objective='reg:linear',
                       max_depth=3,
                       learning_rate=0.01)

    model =reg.fit(Xtrain, ytrain)

    last_data = Xtest.iloc[-1, :]
    X_init = last_data.to_numpy()
    X_init = X_init.reshape(1, -1)

  
    prediction = model.predict(X_init)[0]

    return prediction



@app.get("/ticker/{ticker}")
def read_item(ticker: str):
    df= data_download(ticker)
    df = data_manipolation(df,ticker) 
    df=df.round(2)
    Xtrain, ytrain, Xtest, ytest = data_split_train_test(df,ticker)
    forecast_value = xgb_training_forecast(Xtrain, ytrain, Xtest, ytest)
    #result = forecast_value.to_json(orient="records")
    return json.dumps(forecast_value.round(2), cls=NumpyEncoder)

class NumpyEncoder(json.JSONEncoder):
    """Custom encoder for numpy data types"""

    def default(self, obj):
        # Serialize numpy arrays as lists
        if isinstance(obj, np.ndarray):
            return obj.tolist()

        # Convert numpy integers to Python int
        if isinstance(obj, (np.int_, np.intc, np.intp, np.int8, np.int16,
                            np.int32, np.int64, np.uint8, np.uint16,
                            np.uint32, np.uint64)):
            return int(obj)

        # Convert numpy floats to Python float
        if isinstance(obj, (np.float_, np.float16, np.float32, np.float64)):
            return float(obj)

        # Convert numpy complex numbers to dict
        if isinstance(obj, (np.complex_, np.complex64, np.complex128)):
            return {"real": obj.real, "imag": obj.imag}

        # Use the default encoder for other types
        return json.JSONEncoder.default(self, obj)