Spaces:
Runtime error
Runtime error
from fastapi import FastAPI | |
import requests | |
# from telegram import ChatAction | |
import os | |
from urllib.request import urlopen, Request | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
import json # for graph plotting in website | |
import datetime | |
import yfinance as yf | |
from pandas_datareader import data as pdr | |
from nixtlats import TimeGPT | |
from nixtlats import NixtlaClient | |
import numpy as np | |
import seaborn as sns | |
import sklearn.metrics as metrics | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import OneHotEncoder | |
from sklearn.metrics import mean_absolute_error | |
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor | |
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV | |
from sklearn.preprocessing import OneHotEncoder, LabelEncoder | |
import xgboost as xgb | |
app = FastAPI() | |
def read_root(): | |
return { | |
"message": "Hello, Please type a ticker at the end of the URL to get the stock sentiment." | |
} | |
# XGboost | |
def data_download(ticker: str): | |
ticker = ticker.upper() | |
# Define the list of tickers | |
index_list = [ | |
ticker, '^VIX', '^VVIX', '^VIX9D', '^VIX3M', '^VIX6M', '^FVX', '^TNX', '^TYX' | |
] | |
data = yf.download(index_list, start="1994-01-01", end=None)['Adj Close'] | |
data = data.fillna(method='ffill') | |
df = data.reset_index().round(2) | |
df = df.rename(columns={ | |
'^VIX': 'VIX', '^VVIX': 'VIX_Index', | |
'^VIX9D': 'VIX9D', '^VIX3M': 'VIX3M', '^VIX6M': 'VIX6M', | |
'^FVX': 'T5Y', '^TNX': 'T10Y', '^TYX': 'T30Y' | |
}) | |
df['DDate'] = df['Date'] | |
df['Day'] = pd.to_datetime(df['DDate']).dt.day | |
df['Month'] = pd.to_datetime(df['DDate']).dt.month | |
df['Year'] = pd.to_datetime(df['DDate']).dt.year | |
df = df.set_index('Date') | |
return df | |
def data_manipolation(df,ticker: str): | |
ticker = ticker.upper() | |
# MA calculation for all columns | |
New_Names=[ticker,'VIX','VIX_Index','VIX9D','VIX3M','VIX6M','T5Y','T10Y','T30Y'] | |
for col in New_Names: | |
df[col + "_MA30"] = df[col].rolling(window=30).mean().round(2) | |
df[col + "/_MA30"] = (df[col]/df[col + "_MA30"]).round(4) | |
# Identify numeric time series columns (assuming columns with numeric datatypes) | |
numeric_cols = [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])] | |
# Filter columns to ensure there are at least 2 rows for time series analysis | |
timeseries_cols = [col for col in numeric_cols if len(df) > 1] | |
# Calculate daily changes and percentage changes for required intervals | |
for col in timeseries_cols: | |
# Calculate daily change and store in temporary variable | |
daily_change = df[col].diff().round(2) | |
# Store all computed changes in the DataFrame at once to minimize DataFrame modifications | |
df[col + "_p"] = daily_change | |
df[col + "_c1"] = (daily_change / df[col].shift()).round(4) * 100 # Optimized 1-day percentage change | |
suffixes = ['_p', '_c1', '_MA30', '/_MA30'] | |
basic_cols = ['T5Y', 'T10Y', 'T30Y', 'VIX'] | |
to_keep = basic_cols + [f"{col}{suffix}" for col in basic_cols for suffix in suffixes] | |
ticker_columns = [ticker + suffix for suffix in ['_c1']] | |
to_keep.extend(ticker_columns) | |
# Filter the DataFrame to keep only specified columns and drop rows with missing values | |
df = df[to_keep].dropna() | |
return df | |
def data_split_train_test(df,ticker: str): | |
ticker = ticker.upper() | |
X = df.loc[:,df.columns != ticker + '_c1'] | |
y = df[ ticker + '_c1'] | |
recent_data_size = int(0.3 * len(X)) # Adjust the percentage as needed | |
print (recent_data_size) | |
Xtrain = X.head(len(X) - recent_data_size) # Extract the remaining data for training | |
ytrain = y.head(len(y) - recent_data_size) | |
Xtest = X.tail(recent_data_size) # Extract the most recent data points for testing | |
ytest = y.tail(recent_data_size) | |
Xtest = Xtest.iloc[30:] | |
ytest = ytest.iloc[30:] | |
return Xtrain, ytrain, Xtest, ytest | |
def xgb_training_forecast(Xtrain, ytrain, Xtest, ytest): | |
reg = xgb.XGBRegressor(base_score=0.5, booster='gbtree', | |
n_estimators=1000, | |
objective='reg:linear', | |
max_depth=3, | |
learning_rate=0.01) | |
model =reg.fit(Xtrain, ytrain) | |
last_data = Xtest.iloc[-1, :] | |
X_init = last_data.to_numpy() | |
X_init = X_init.reshape(1, -1) | |
prediction = model.predict(X_init)[0] | |
return prediction | |
def read_item(ticker: str): | |
df= data_download(ticker) | |
df = data_manipolation(df,ticker) | |
df=df.round(2) | |
Xtrain, ytrain, Xtest, ytest = data_split_train_test(df,ticker) | |
forecast_value = xgb_training_forecast(Xtrain, ytrain, Xtest, ytest) | |
#result = forecast_value.to_json(orient="records") | |
return json.dumps(forecast_value.round(2), cls=NumpyEncoder) | |
class NumpyEncoder(json.JSONEncoder): | |
"""Custom encoder for numpy data types""" | |
def default(self, obj): | |
# Serialize numpy arrays as lists | |
if isinstance(obj, np.ndarray): | |
return obj.tolist() | |
# Convert numpy integers to Python int | |
if isinstance(obj, (np.int_, np.intc, np.intp, np.int8, np.int16, | |
np.int32, np.int64, np.uint8, np.uint16, | |
np.uint32, np.uint64)): | |
return int(obj) | |
# Convert numpy floats to Python float | |
if isinstance(obj, (np.float_, np.float16, np.float32, np.float64)): | |
return float(obj) | |
# Convert numpy complex numbers to dict | |
if isinstance(obj, (np.complex_, np.complex64, np.complex128)): | |
return {"real": obj.real, "imag": obj.imag} | |
# Use the default encoder for other types | |
return json.JSONEncoder.default(self, obj) | |