OfirMatzlawi commited on
Commit
47ce08f
1 Parent(s): 5b90a23

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +102 -18
main.py CHANGED
@@ -17,6 +17,18 @@ from pandas_datareader import data as pdr
17
  from nixtlats import TimeGPT
18
  from nixtlats import NixtlaClient
19
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  app = FastAPI()
21
 
22
 
@@ -27,35 +39,107 @@ def read_root():
27
 
28
  }
29
 
30
- ## TimeGPT
31
-
32
-
33
- def get_data_from_yahoo(ticker: str):
34
- yf.pdr_override()
35
- data = pdr.get_data_yahoo(ticker, start="2019-01-01", end=None)
36
- df = pd.DataFrame(data).reset_index()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  return df
38
 
39
 
40
-
41
 
42
- def forecasting(df):
43
- api_key = 'nixt-7nixiWJtV1TwD3vp9K8WqLrgkIZmsXZ0gxBqrOSI1E3XPpkVSakyPYgMtWdtNKeBgStnPncgzpGqQzoG'
44
- nixtla_client = NixtlaClient(api_key = api_key)
45
- ## forecasting 3 months
46
- timegpt_fcst_df = nixtla_client.forecast(df=df, h=3, freq='MS', finetune_steps=10, time_col='Date', target_col='Adj Close')
47
-
48
- return timegpt_fcst_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
 
 
 
 
 
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
 
53
 
54
  @app.get("/ticker/{ticker}")
55
  def read_item(ticker: str):
56
- data = get_data_from_yahoo(ticker)
57
- pred = forecasting(data)
58
- result = pred.to_json(orient="split")
 
 
 
59
  return result
60
 
61
 
 
17
  from nixtlats import TimeGPT
18
  from nixtlats import NixtlaClient
19
 
20
+ import numpy as np
21
+ import seaborn as sns
22
+ import sklearn.metrics as metrics
23
+ from sklearn.model_selection import train_test_split
24
+ from sklearn.preprocessing import OneHotEncoder
25
+ from sklearn.metrics import mean_absolute_error
26
+ from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
27
+ from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
28
+ from sklearn.preprocessing import OneHotEncoder, LabelEncoder
29
+ import xgboost as xgb
30
+
31
+
32
  app = FastAPI()
33
 
34
 
 
39
 
40
  }
41
 
42
+ # XGboost
43
+
44
+ def data_download(ticker: str):
45
+ # Define the list of tickers
46
+ index_list = [
47
+ Ticker, '^VIX', '^VVIX', '^VIX9D', '^VIX3M', '^VIX6M', '^FVX', '^TNX', '^TYX'
48
+ ]
49
+
50
+ data = yf.download(index_list, start="1994-01-01", end=None)['Adj Close']
51
+ data = data.fillna(method='ffill')
52
+ df = data.reset_index().round(2)
53
+ df = df.rename(columns={
54
+ '^VIX': 'VIX', '^VVIX': 'VIX_Index',
55
+ '^VIX9D': 'VIX9D', '^VIX3M': 'VIX3M', '^VIX6M': 'VIX6M',
56
+ '^FVX': 'T5Y', '^TNX': 'T10Y', '^TYX': 'T30Y'
57
+ })
58
+ df['DDate'] = df['Date']
59
+ df['Day'] = pd.to_datetime(df['DDate']).dt.day
60
+ df['Month'] = pd.to_datetime(df['DDate']).dt.month
61
+ df['Year'] = pd.to_datetime(df['DDate']).dt.year
62
+ df = df.set_index('Date')
63
  return df
64
 
65
 
 
66
 
67
+ def data_manipolation(df):
68
+ # MA calculation for all columns
69
+ New_Names=[Ticker,'VIX','VIX_Index','VIX9D','VIX3M','VIX6M','T5Y','T10Y','T30Y']
70
+ for col in New_Names:
71
+ df[col + "_MA30"] = df[col].rolling(window=30).mean().round(2)
72
+ df[col + "/_MA30"] = (df[col]/df[col + "_MA30"]).round(4)
73
+ # Identify numeric time series columns (assuming columns with numeric datatypes)
74
+ numeric_cols = [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])]
75
+ # Filter columns to ensure there are at least 2 rows for time series analysis
76
+ timeseries_cols = [col for col in numeric_cols if len(df) > 1]
77
+
78
+ # Calculate daily changes and percentage changes for required intervals
79
+ for col in timeseries_cols:
80
+ # Calculate daily change and store in temporary variable
81
+ daily_change = df[col].diff().round(2)
82
+
83
+ # Store all computed changes in the DataFrame at once to minimize DataFrame modifications
84
+ df[col + "_p"] = daily_change
85
+ df[col + "_c1"] = (daily_change / df[col].shift()).round(4) * 100 # Optimized 1-day percentage change
86
+
87
+
88
+ suffixes = ['_p', '_c1', '_MA30', '/_MA30']
89
+ basic_cols = ['T5Y', 'T10Y', 'T30Y', 'VIX']
90
+ to_keep = basic_cols + [f"{col}{suffix}" for col in basic_cols for suffix in suffixes]
91
+ ticker_columns = [Ticker + suffix for suffix in ['_c1']]
92
+ to_keep.extend(ticker_columns)
93
+
94
+ # Filter the DataFrame to keep only specified columns and drop rows with missing values
95
+ df = df[to_keep].dropna()
96
+
97
+ return df
98
 
99
+ def data_split_train_test(df):
100
+ X = df.loc[:,df.columns != Ticker + '_c1']
101
+ y = df[ Ticker + '_c1']
102
+ recent_data_size = int(0.3 * len(X)) # Adjust the percentage as needed
103
+ print (recent_data_size)
104
 
105
+ Xtrain = X.head(len(X) - recent_data_size) # Extract the remaining data for training
106
+ ytrain = y.head(len(y) - recent_data_size)
107
+
108
+ Xtest = X.tail(recent_data_size) # Extract the most recent data points for testing
109
+ ytest = y.tail(recent_data_size)
110
+ Xtest = Xtest.iloc[30:]
111
+ ytest = ytest.iloc[30:]
112
+
113
+ return Xtrain, ytrain, Xtest, ytest
114
+
115
+ def xgb_training_forecast(Xtrain, ytrain, Xtest, ytest):
116
+ reg = xgb.XGBRegressor(base_score=0.5, booster='gbtree',
117
+ n_estimators=1000,
118
+ objective='reg:linear',
119
+ max_depth=3,
120
+ learning_rate=0.01)
121
+
122
+ model =reg.fit(Xtrain, ytrain)
123
+
124
+ last_data = Xtest.iloc[-1, :]
125
+ X_init = last_data.to_numpy()
126
+ X_init = X_init.reshape(1, -1)
127
+
128
+
129
+ prediction = model.predict(X_init)[0]
130
+
131
+ return prediction
132
 
133
 
134
 
135
  @app.get("/ticker/{ticker}")
136
  def read_item(ticker: str):
137
+ df= data_download(ticker)
138
+ df = data_manipolation(df)
139
+ df=df.round(2)
140
+ Xtrain, ytrain, Xtest, ytest = data_split_train_test(df)
141
+ forecast_value = xgb_training_forecast(Xtrain, ytrain, Xtest, ytest).round(2)
142
+ result = forecast_value.to_json(orient="split")
143
  return result
144
 
145