from fastapi import FastAPI import requests # from telegram import ChatAction import os from urllib.request import urlopen, Request from bs4 import BeautifulSoup import pandas as pd import json # for graph plotting in website import datetime import yfinance as yf from pandas_datareader import data as pdr from nixtlats import TimeGPT from nixtlats import NixtlaClient import numpy as np import seaborn as sns import sklearn.metrics as metrics from sklearn.model_selection import train_test_split from sklearn.preprocessing import OneHotEncoder from sklearn.metrics import mean_absolute_error from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV from sklearn.preprocessing import OneHotEncoder, LabelEncoder import xgboost as xgb app = FastAPI() @app.get("/") def read_root(): return { "message": "Hello, Please type a ticker at the end of the URL to get the stock sentiment." } # XGboost def data_download(ticker: str): ticker = ticker.upper() # Define the list of tickers index_list = [ ticker, '^VIX', '^VVIX', '^VIX9D', '^VIX3M', '^VIX6M', '^FVX', '^TNX', '^TYX' ] data = yf.download(index_list, start="1994-01-01", end=None)['Adj Close'] data = data.fillna(method='ffill') df = data.reset_index().round(2) df = df.rename(columns={ '^VIX': 'VIX', '^VVIX': 'VIX_Index', '^VIX9D': 'VIX9D', '^VIX3M': 'VIX3M', '^VIX6M': 'VIX6M', '^FVX': 'T5Y', '^TNX': 'T10Y', '^TYX': 'T30Y' }) df['DDate'] = df['Date'] df['Day'] = pd.to_datetime(df['DDate']).dt.day df['Month'] = pd.to_datetime(df['DDate']).dt.month df['Year'] = pd.to_datetime(df['DDate']).dt.year df = df.set_index('Date') return df def data_manipolation(df,ticker: str): ticker = ticker.upper() # MA calculation for all columns New_Names=[ticker,'VIX','VIX_Index','VIX9D','VIX3M','VIX6M','T5Y','T10Y','T30Y'] for col in New_Names: df[col + "_MA30"] = df[col].rolling(window=30).mean().round(2) df[col + "/_MA30"] = (df[col]/df[col + "_MA30"]).round(4) # Identify numeric time series columns (assuming columns with numeric datatypes) numeric_cols = [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])] # Filter columns to ensure there are at least 2 rows for time series analysis timeseries_cols = [col for col in numeric_cols if len(df) > 1] # Calculate daily changes and percentage changes for required intervals for col in timeseries_cols: # Calculate daily change and store in temporary variable daily_change = df[col].diff().round(2) # Store all computed changes in the DataFrame at once to minimize DataFrame modifications df[col + "_p"] = daily_change df[col + "_c1"] = (daily_change / df[col].shift()).round(4) * 100 # Optimized 1-day percentage change suffixes = ['_p', '_c1', '_MA30', '/_MA30'] basic_cols = ['T5Y', 'T10Y', 'T30Y', 'VIX'] to_keep = basic_cols + [f"{col}{suffix}" for col in basic_cols for suffix in suffixes] ticker_columns = [ticker + suffix for suffix in ['_c1']] to_keep.extend(ticker_columns) # Filter the DataFrame to keep only specified columns and drop rows with missing values df = df[to_keep].dropna() return df def data_split_train_test(df,ticker: str): ticker = ticker.upper() X = df.loc[:,df.columns != ticker + '_c1'] y = df[ ticker + '_c1'] recent_data_size = int(0.3 * len(X)) # Adjust the percentage as needed print (recent_data_size) Xtrain = X.head(len(X) - recent_data_size) # Extract the remaining data for training ytrain = y.head(len(y) - recent_data_size) Xtest = X.tail(recent_data_size) # Extract the most recent data points for testing ytest = y.tail(recent_data_size) Xtest = Xtest.iloc[30:] ytest = ytest.iloc[30:] return Xtrain, ytrain, Xtest, ytest def xgb_training_forecast(Xtrain, ytrain, Xtest, ytest): reg = xgb.XGBRegressor(base_score=0.5, booster='gbtree', n_estimators=1000, objective='reg:linear', max_depth=3, learning_rate=0.01) model =reg.fit(Xtrain, ytrain) last_data = Xtest.iloc[-1, :] X_init = last_data.to_numpy() X_init = X_init.reshape(1, -1) prediction = model.predict(X_init)[0] return prediction @app.get("/ticker/{ticker}") def read_item(ticker: str): df= data_download(ticker) df = data_manipolation(df,ticker) df=df.round(2) Xtrain, ytrain, Xtest, ytest = data_split_train_test(df,ticker) forecast_value = xgb_training_forecast(Xtrain, ytrain, Xtest, ytest) #result = forecast_value.to_json(orient="records") return json.dumps(forecast_value.round(2), cls=NumpyEncoder) class NumpyEncoder(json.JSONEncoder): """Custom encoder for numpy data types""" def default(self, obj): # Serialize numpy arrays as lists if isinstance(obj, np.ndarray): return obj.tolist() # Convert numpy integers to Python int if isinstance(obj, (np.int_, np.intc, np.intp, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64)): return int(obj) # Convert numpy floats to Python float if isinstance(obj, (np.float_, np.float16, np.float32, np.float64)): return float(obj) # Convert numpy complex numbers to dict if isinstance(obj, (np.complex_, np.complex64, np.complex128)): return {"real": obj.real, "imag": obj.imag} # Use the default encoder for other types return json.JSONEncoder.default(self, obj)