fin_proj_docker_2

Runtime error

App Files Files Community

OfirMatzlawi commited on Apr 30

Commit

47ce08f

•

1 Parent(s): 5b90a23

Update main.py

Browse files

Files changed (1) hide show

main.py +102 -18

main.py CHANGED Viewed

@@ -17,6 +17,18 @@ from pandas_datareader import data as pdr
 from nixtlats import TimeGPT
 from nixtlats import NixtlaClient
 app = FastAPI()
@@ -27,35 +39,107 @@ def read_root():
     }
-## TimeGPT
-def get_data_from_yahoo(ticker: str):
-    yf.pdr_override()
-    data = pdr.get_data_yahoo(ticker, start="2019-01-01", end=None)
-    df = pd.DataFrame(data).reset_index()
     return df
-def forecasting(df):
-    api_key = 'nixt-7nixiWJtV1TwD3vp9K8WqLrgkIZmsXZ0gxBqrOSI1E3XPpkVSakyPYgMtWdtNKeBgStnPncgzpGqQzoG'
-    nixtla_client = NixtlaClient(api_key = api_key)
-## forecasting 3 months
-    timegpt_fcst_df = nixtla_client.forecast(df=df, h=3, freq='MS', finetune_steps=10, time_col='Date', target_col='Adj Close')
-    return timegpt_fcst_df
 @app.get("/ticker/{ticker}")
 def read_item(ticker: str):
-    data = get_data_from_yahoo(ticker)
-    pred = forecasting(data)
-    result = pred.to_json(orient="split")
     return result

 from nixtlats import TimeGPT
 from nixtlats import NixtlaClient
+import numpy as np
+import seaborn as sns
+import sklearn.metrics as metrics
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.metrics import mean_absolute_error
+from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
+from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
+from sklearn.preprocessing import OneHotEncoder, LabelEncoder
+import xgboost as xgb
 app = FastAPI()
     }
+# XGboost
+def data_download(ticker: str):
+    # Define the list of tickers
+    index_list = [
+        Ticker, '^VIX', '^VVIX', '^VIX9D', '^VIX3M', '^VIX6M', '^FVX', '^TNX', '^TYX'
+    ]
+    data = yf.download(index_list, start="1994-01-01", end=None)['Adj Close']
+    data = data.fillna(method='ffill')
+    df = data.reset_index().round(2)
+    df = df.rename(columns={
+        '^VIX': 'VIX', '^VVIX': 'VIX_Index',
+        '^VIX9D': 'VIX9D', '^VIX3M': 'VIX3M', '^VIX6M': 'VIX6M',
+        '^FVX': 'T5Y', '^TNX': 'T10Y', '^TYX': 'T30Y'
+    })
+    df['DDate'] = df['Date']
+    df['Day'] = pd.to_datetime(df['DDate']).dt.day
+    df['Month'] = pd.to_datetime(df['DDate']).dt.month
+    df['Year'] = pd.to_datetime(df['DDate']).dt.year
+    df = df.set_index('Date')
     return df
+def data_manipolation(df):
+    # MA calculation for all columns
+    New_Names=[Ticker,'VIX','VIX_Index','VIX9D','VIX3M','VIX6M','T5Y','T10Y','T30Y']
+    for col in New_Names:
+        df[col + "_MA30"] = df[col].rolling(window=30).mean().round(2)
+        df[col + "/_MA30"] = (df[col]/df[col + "_MA30"]).round(4)
+    # Identify numeric time series columns (assuming columns with numeric datatypes)
+    numeric_cols = [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])]
+    # Filter columns to ensure there are at least 2 rows for time series analysis
+    timeseries_cols = [col for col in numeric_cols if len(df) > 1]
+    # Calculate daily changes and percentage changes for required intervals
+    for col in timeseries_cols:
+        # Calculate daily change and store in temporary variable
+        daily_change = df[col].diff().round(2)
+        # Store all computed changes in the DataFrame at once to minimize DataFrame modifications
+        df[col + "_p"] = daily_change
+        df[col + "_c1"] = (daily_change / df[col].shift()).round(4) * 100  # Optimized 1-day percentage change
+    suffixes = ['_p', '_c1', '_MA30', '/_MA30']
+    basic_cols = ['T5Y', 'T10Y', 'T30Y', 'VIX']
+    to_keep = basic_cols + [f"{col}{suffix}" for col in basic_cols for suffix in suffixes]
+    ticker_columns = [Ticker + suffix for suffix in ['_c1']]
+    to_keep.extend(ticker_columns)
+    # Filter the DataFrame to keep only specified columns and drop rows with missing values
+    df = df[to_keep].dropna()
+    return df
+def data_split_train_test(df):
+    X = df.loc[:,df.columns !=  Ticker + '_c1']
+    y = df[ Ticker + '_c1']
+    recent_data_size = int(0.3 * len(X))  # Adjust the percentage as needed
+    print (recent_data_size)
+    Xtrain = X.head(len(X) - recent_data_size)  # Extract the remaining data for training
+    ytrain = y.head(len(y) - recent_data_size)
+    Xtest = X.tail(recent_data_size)  # Extract the most recent data points for testing
+    ytest = y.tail(recent_data_size)
+    Xtest = Xtest.iloc[30:]
+    ytest = ytest.iloc[30:]
+    return Xtrain, ytrain, Xtest, ytest
+def xgb_training_forecast(Xtrain, ytrain, Xtest, ytest):
+    reg = xgb.XGBRegressor(base_score=0.5, booster='gbtree',
+                       n_estimators=1000,
+                       objective='reg:linear',
+                       max_depth=3,
+                       learning_rate=0.01)
+    model =reg.fit(Xtrain, ytrain)
+    last_data = Xtest.iloc[-1, :]
+    X_init = last_data.to_numpy()
+    X_init = X_init.reshape(1, -1)
+    prediction = model.predict(X_init)[0]
+    return prediction
 @app.get("/ticker/{ticker}")
 def read_item(ticker: str):
+    df= data_download(ticker)
+    df = data_manipolation(df)
+    df=df.round(2)
+    Xtrain, ytrain, Xtest, ytest = data_split_train_test(df)
+    forecast_value = xgb_training_forecast(Xtrain, ytrain, Xtest, ytest).round(2)
+    result = forecast_value.to_json(orient="split")
     return result