OfirMatzlawi's picture
Update main.py
4e788f7
raw
history blame contribute delete
No virus
5.88 kB
from fastapi import FastAPI
import requests
# from telegram import ChatAction
import os
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import pandas as pd
import json # for graph plotting in website
import datetime
import yfinance as yf
from pandas_datareader import data as pdr
from nixtlats import TimeGPT
from nixtlats import NixtlaClient
import numpy as np
import seaborn as sns
import sklearn.metrics as metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
import xgboost as xgb
app = FastAPI()
@app.get("/")
def read_root():
return {
"message": "Hello, Please type a ticker at the end of the URL to get the stock sentiment."
}
# XGboost
def data_download(ticker: str):
ticker = ticker.upper()
# Define the list of tickers
index_list = [
ticker, '^VIX', '^VVIX', '^VIX9D', '^VIX3M', '^VIX6M', '^FVX', '^TNX', '^TYX'
]
data = yf.download(index_list, start="1994-01-01", end=None)['Adj Close']
data = data.fillna(method='ffill')
df = data.reset_index().round(2)
df = df.rename(columns={
'^VIX': 'VIX', '^VVIX': 'VIX_Index',
'^VIX9D': 'VIX9D', '^VIX3M': 'VIX3M', '^VIX6M': 'VIX6M',
'^FVX': 'T5Y', '^TNX': 'T10Y', '^TYX': 'T30Y'
})
df['DDate'] = df['Date']
df['Day'] = pd.to_datetime(df['DDate']).dt.day
df['Month'] = pd.to_datetime(df['DDate']).dt.month
df['Year'] = pd.to_datetime(df['DDate']).dt.year
df = df.set_index('Date')
return df
def data_manipolation(df,ticker: str):
ticker = ticker.upper()
# MA calculation for all columns
New_Names=[ticker,'VIX','VIX_Index','VIX9D','VIX3M','VIX6M','T5Y','T10Y','T30Y']
for col in New_Names:
df[col + "_MA30"] = df[col].rolling(window=30).mean().round(2)
df[col + "/_MA30"] = (df[col]/df[col + "_MA30"]).round(4)
# Identify numeric time series columns (assuming columns with numeric datatypes)
numeric_cols = [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])]
# Filter columns to ensure there are at least 2 rows for time series analysis
timeseries_cols = [col for col in numeric_cols if len(df) > 1]
# Calculate daily changes and percentage changes for required intervals
for col in timeseries_cols:
# Calculate daily change and store in temporary variable
daily_change = df[col].diff().round(2)
# Store all computed changes in the DataFrame at once to minimize DataFrame modifications
df[col + "_p"] = daily_change
df[col + "_c1"] = (daily_change / df[col].shift()).round(4) * 100 # Optimized 1-day percentage change
suffixes = ['_p', '_c1', '_MA30', '/_MA30']
basic_cols = ['T5Y', 'T10Y', 'T30Y', 'VIX']
to_keep = basic_cols + [f"{col}{suffix}" for col in basic_cols for suffix in suffixes]
ticker_columns = [ticker + suffix for suffix in ['_c1']]
to_keep.extend(ticker_columns)
# Filter the DataFrame to keep only specified columns and drop rows with missing values
df = df[to_keep].dropna()
return df
def data_split_train_test(df,ticker: str):
ticker = ticker.upper()
X = df.loc[:,df.columns != ticker + '_c1']
y = df[ ticker + '_c1']
recent_data_size = int(0.3 * len(X)) # Adjust the percentage as needed
print (recent_data_size)
Xtrain = X.head(len(X) - recent_data_size) # Extract the remaining data for training
ytrain = y.head(len(y) - recent_data_size)
Xtest = X.tail(recent_data_size) # Extract the most recent data points for testing
ytest = y.tail(recent_data_size)
Xtest = Xtest.iloc[30:]
ytest = ytest.iloc[30:]
return Xtrain, ytrain, Xtest, ytest
def xgb_training_forecast(Xtrain, ytrain, Xtest, ytest):
reg = xgb.XGBRegressor(base_score=0.5, booster='gbtree',
n_estimators=1000,
objective='reg:linear',
max_depth=3,
learning_rate=0.01)
model =reg.fit(Xtrain, ytrain)
last_data = Xtest.iloc[-1, :]
X_init = last_data.to_numpy()
X_init = X_init.reshape(1, -1)
prediction = model.predict(X_init)[0]
return prediction
@app.get("/ticker/{ticker}")
def read_item(ticker: str):
df= data_download(ticker)
df = data_manipolation(df,ticker)
df=df.round(2)
Xtrain, ytrain, Xtest, ytest = data_split_train_test(df,ticker)
forecast_value = xgb_training_forecast(Xtrain, ytrain, Xtest, ytest)
#result = forecast_value.to_json(orient="records")
return json.dumps(forecast_value.round(2), cls=NumpyEncoder)
class NumpyEncoder(json.JSONEncoder):
"""Custom encoder for numpy data types"""
def default(self, obj):
# Serialize numpy arrays as lists
if isinstance(obj, np.ndarray):
return obj.tolist()
# Convert numpy integers to Python int
if isinstance(obj, (np.int_, np.intc, np.intp, np.int8, np.int16,
np.int32, np.int64, np.uint8, np.uint16,
np.uint32, np.uint64)):
return int(obj)
# Convert numpy floats to Python float
if isinstance(obj, (np.float_, np.float16, np.float32, np.float64)):
return float(obj)
# Convert numpy complex numbers to dict
if isinstance(obj, (np.complex_, np.complex64, np.complex128)):
return {"real": obj.real, "imag": obj.imag}
# Use the default encoder for other types
return json.JSONEncoder.default(self, obj)