Spaces:
Runtime error
Runtime error
| from fastapi import FastAPI | |
| import requests | |
| # from telegram import ChatAction | |
| import os | |
| from urllib.request import urlopen, Request | |
| from bs4 import BeautifulSoup | |
| import pandas as pd | |
| import json # for graph plotting in website | |
| import datetime | |
| import yfinance as yf | |
| from pandas_datareader import data as pdr | |
| from nixtlats import TimeGPT | |
| from nixtlats import NixtlaClient | |
| import numpy as np | |
| import seaborn as sns | |
| import sklearn.metrics as metrics | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import OneHotEncoder | |
| from sklearn.metrics import mean_absolute_error | |
| from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor | |
| from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV | |
| from sklearn.preprocessing import OneHotEncoder, LabelEncoder | |
| import xgboost as xgb | |
| app = FastAPI() | |
| def read_root(): | |
| return { | |
| "message": "Hello, Please type a ticker at the end of the URL to get the stock sentiment." | |
| } | |
| # XGboost | |
| def data_download(ticker: str): | |
| ticker = ticker.upper() | |
| # Define the list of tickers | |
| index_list = [ | |
| ticker, '^VIX', '^VVIX', '^VIX9D', '^VIX3M', '^VIX6M', '^FVX', '^TNX', '^TYX' | |
| ] | |
| data = yf.download(index_list, start="1994-01-01", end=None)['Adj Close'] | |
| data = data.fillna(method='ffill') | |
| df = data.reset_index().round(2) | |
| df = df.rename(columns={ | |
| '^VIX': 'VIX', '^VVIX': 'VIX_Index', | |
| '^VIX9D': 'VIX9D', '^VIX3M': 'VIX3M', '^VIX6M': 'VIX6M', | |
| '^FVX': 'T5Y', '^TNX': 'T10Y', '^TYX': 'T30Y' | |
| }) | |
| df['DDate'] = df['Date'] | |
| df['Day'] = pd.to_datetime(df['DDate']).dt.day | |
| df['Month'] = pd.to_datetime(df['DDate']).dt.month | |
| df['Year'] = pd.to_datetime(df['DDate']).dt.year | |
| df = df.set_index('Date') | |
| return df | |
| def data_manipolation(df,ticker: str): | |
| ticker = ticker.upper() | |
| # MA calculation for all columns | |
| New_Names=[ticker,'VIX','VIX_Index','VIX9D','VIX3M','VIX6M','T5Y','T10Y','T30Y'] | |
| for col in New_Names: | |
| df[col + "_MA30"] = df[col].rolling(window=30).mean().round(2) | |
| df[col + "/_MA30"] = (df[col]/df[col + "_MA30"]).round(4) | |
| # Identify numeric time series columns (assuming columns with numeric datatypes) | |
| numeric_cols = [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])] | |
| # Filter columns to ensure there are at least 2 rows for time series analysis | |
| timeseries_cols = [col for col in numeric_cols if len(df) > 1] | |
| # Calculate daily changes and percentage changes for required intervals | |
| for col in timeseries_cols: | |
| # Calculate daily change and store in temporary variable | |
| daily_change = df[col].diff().round(2) | |
| # Store all computed changes in the DataFrame at once to minimize DataFrame modifications | |
| df[col + "_p"] = daily_change | |
| df[col + "_c1"] = (daily_change / df[col].shift()).round(4) * 100 # Optimized 1-day percentage change | |
| suffixes = ['_p', '_c1', '_MA30', '/_MA30'] | |
| basic_cols = ['T5Y', 'T10Y', 'T30Y', 'VIX'] | |
| to_keep = basic_cols + [f"{col}{suffix}" for col in basic_cols for suffix in suffixes] | |
| ticker_columns = [ticker + suffix for suffix in ['_c1']] | |
| to_keep.extend(ticker_columns) | |
| # Filter the DataFrame to keep only specified columns and drop rows with missing values | |
| df = df[to_keep].dropna() | |
| return df | |
| def data_split_train_test(df,ticker: str): | |
| ticker = ticker.upper() | |
| X = df.loc[:,df.columns != ticker + '_c1'] | |
| y = df[ ticker + '_c1'] | |
| recent_data_size = int(0.3 * len(X)) # Adjust the percentage as needed | |
| print (recent_data_size) | |
| Xtrain = X.head(len(X) - recent_data_size) # Extract the remaining data for training | |
| ytrain = y.head(len(y) - recent_data_size) | |
| Xtest = X.tail(recent_data_size) # Extract the most recent data points for testing | |
| ytest = y.tail(recent_data_size) | |
| Xtest = Xtest.iloc[30:] | |
| ytest = ytest.iloc[30:] | |
| return Xtrain, ytrain, Xtest, ytest | |
| def xgb_training_forecast(Xtrain, ytrain, Xtest, ytest): | |
| reg = xgb.XGBRegressor(base_score=0.5, booster='gbtree', | |
| n_estimators=1000, | |
| objective='reg:linear', | |
| max_depth=3, | |
| learning_rate=0.01) | |
| model =reg.fit(Xtrain, ytrain) | |
| last_data = Xtest.iloc[-1, :] | |
| X_init = last_data.to_numpy() | |
| X_init = X_init.reshape(1, -1) | |
| prediction = model.predict(X_init)[0] | |
| return prediction | |
| def read_item(ticker: str): | |
| df= data_download(ticker) | |
| df = data_manipolation(df,ticker) | |
| df=df.round(2) | |
| Xtrain, ytrain, Xtest, ytest = data_split_train_test(df,ticker) | |
| forecast_value = xgb_training_forecast(Xtrain, ytrain, Xtest, ytest) | |
| #result = forecast_value.to_json(orient="records") | |
| return json.dumps(forecast_value.round(2), cls=NumpyEncoder) | |
| class NumpyEncoder(json.JSONEncoder): | |
| """Custom encoder for numpy data types""" | |
| def default(self, obj): | |
| # Serialize numpy arrays as lists | |
| if isinstance(obj, np.ndarray): | |
| return obj.tolist() | |
| # Convert numpy integers to Python int | |
| if isinstance(obj, (np.int_, np.intc, np.intp, np.int8, np.int16, | |
| np.int32, np.int64, np.uint8, np.uint16, | |
| np.uint32, np.uint64)): | |
| return int(obj) | |
| # Convert numpy floats to Python float | |
| if isinstance(obj, (np.float_, np.float16, np.float32, np.float64)): | |
| return float(obj) | |
| # Convert numpy complex numbers to dict | |
| if isinstance(obj, (np.complex_, np.complex64, np.complex128)): | |
| return {"real": obj.real, "imag": obj.imag} | |
| # Use the default encoder for other types | |
| return json.JSONEncoder.default(self, obj) | |