fin_proj_docker_2

Runtime error

App Files Files Community

fin_proj_docker_2 / main.py

OfirMatzlawi

Update main.py

4e788f7 2 months ago

raw

history blame contribute delete

No virus

5.88 kB

	from fastapi import FastAPI
	import requests

	# from telegram import ChatAction
	import os
	from urllib.request import urlopen, Request
	from bs4 import BeautifulSoup
	import pandas as pd
	import json # for graph plotting in website



	import datetime

	import yfinance as yf
	from pandas_datareader import data as pdr
	from nixtlats import TimeGPT
	from nixtlats import NixtlaClient

	import numpy as np
	import seaborn as sns
	import sklearn.metrics as metrics
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import OneHotEncoder
	from sklearn.metrics import mean_absolute_error
	from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
	from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
	from sklearn.preprocessing import OneHotEncoder, LabelEncoder
	import xgboost as xgb


	app = FastAPI()


	@app.get("/")
	def read_root():
	return {
	"message": "Hello, Please type a ticker at the end of the URL to get the stock sentiment."

	}

	# XGboost



	def data_download(ticker: str):
	ticker = ticker.upper()
	# Define the list of tickers
	index_list = [
	ticker, '^VIX', '^VVIX', '^VIX9D', '^VIX3M', '^VIX6M', '^FVX', '^TNX', '^TYX'
	]

	data = yf.download(index_list, start="1994-01-01", end=None)['Adj Close']
	data = data.fillna(method='ffill')
	df = data.reset_index().round(2)
	df = df.rename(columns={
	'^VIX': 'VIX', '^VVIX': 'VIX_Index',
	'^VIX9D': 'VIX9D', '^VIX3M': 'VIX3M', '^VIX6M': 'VIX6M',
	'^FVX': 'T5Y', '^TNX': 'T10Y', '^TYX': 'T30Y'
	})
	df['DDate'] = df['Date']
	df['Day'] = pd.to_datetime(df['DDate']).dt.day
	df['Month'] = pd.to_datetime(df['DDate']).dt.month
	df['Year'] = pd.to_datetime(df['DDate']).dt.year
	df = df.set_index('Date')
	return df



	def data_manipolation(df,ticker: str):
	ticker = ticker.upper()
	# MA calculation for all columns
	New_Names=[ticker,'VIX','VIX_Index','VIX9D','VIX3M','VIX6M','T5Y','T10Y','T30Y']
	for col in New_Names:
	df[col + "_MA30"] = df[col].rolling(window=30).mean().round(2)
	df[col + "/_MA30"] = (df[col]/df[col + "_MA30"]).round(4)
	# Identify numeric time series columns (assuming columns with numeric datatypes)
	numeric_cols = [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])]
	# Filter columns to ensure there are at least 2 rows for time series analysis
	timeseries_cols = [col for col in numeric_cols if len(df) > 1]

	# Calculate daily changes and percentage changes for required intervals
	for col in timeseries_cols:
	# Calculate daily change and store in temporary variable
	daily_change = df[col].diff().round(2)

	# Store all computed changes in the DataFrame at once to minimize DataFrame modifications
	df[col + "_p"] = daily_change
	df[col + "_c1"] = (daily_change / df[col].shift()).round(4) * 100 # Optimized 1-day percentage change


	suffixes = ['_p', '_c1', '_MA30', '/_MA30']
	basic_cols = ['T5Y', 'T10Y', 'T30Y', 'VIX']
	to_keep = basic_cols + [f"{col}{suffix}" for col in basic_cols for suffix in suffixes]
	ticker_columns = [ticker + suffix for suffix in ['_c1']]
	to_keep.extend(ticker_columns)

	# Filter the DataFrame to keep only specified columns and drop rows with missing values
	df = df[to_keep].dropna()

	return df

	def data_split_train_test(df,ticker: str):
	ticker = ticker.upper()
	X = df.loc[:,df.columns != ticker + '_c1']
	y = df[ ticker + '_c1']
	recent_data_size = int(0.3 * len(X)) # Adjust the percentage as needed
	print (recent_data_size)

	Xtrain = X.head(len(X) - recent_data_size) # Extract the remaining data for training
	ytrain = y.head(len(y) - recent_data_size)

	Xtest = X.tail(recent_data_size) # Extract the most recent data points for testing
	ytest = y.tail(recent_data_size)
	Xtest = Xtest.iloc[30:]
	ytest = ytest.iloc[30:]

	return Xtrain, ytrain, Xtest, ytest

	def xgb_training_forecast(Xtrain, ytrain, Xtest, ytest):
	reg = xgb.XGBRegressor(base_score=0.5, booster='gbtree',
	n_estimators=1000,
	objective='reg:linear',
	max_depth=3,
	learning_rate=0.01)

	model =reg.fit(Xtrain, ytrain)

	last_data = Xtest.iloc[-1, :]
	X_init = last_data.to_numpy()
	X_init = X_init.reshape(1, -1)


	prediction = model.predict(X_init)[0]

	return prediction



	@app.get("/ticker/{ticker}")
	def read_item(ticker: str):
	df= data_download(ticker)
	df = data_manipolation(df,ticker)
	df=df.round(2)
	Xtrain, ytrain, Xtest, ytest = data_split_train_test(df,ticker)
	forecast_value = xgb_training_forecast(Xtrain, ytrain, Xtest, ytest)
	#result = forecast_value.to_json(orient="records")
	return json.dumps(forecast_value.round(2), cls=NumpyEncoder)

	class NumpyEncoder(json.JSONEncoder):
	"""Custom encoder for numpy data types"""

	def default(self, obj):
	# Serialize numpy arrays as lists
	if isinstance(obj, np.ndarray):
	return obj.tolist()

	# Convert numpy integers to Python int
	if isinstance(obj, (np.int_, np.intc, np.intp, np.int8, np.int16,
	np.int32, np.int64, np.uint8, np.uint16,
	np.uint32, np.uint64)):
	return int(obj)

	# Convert numpy floats to Python float
	if isinstance(obj, (np.float_, np.float16, np.float32, np.float64)):
	return float(obj)

	# Convert numpy complex numbers to dict
	if isinstance(obj, (np.complex_, np.complex64, np.complex128)):
	return {"real": obj.real, "imag": obj.imag}

	# Use the default encoder for other types
	return json.JSONEncoder.default(self, obj)