SadikHasanKC commited on
Commit
d54ac74
1 Parent(s): 5a2c6e6
Files changed (1) hide show
  1. notebooks/SML/app.py +98 -80
notebooks/SML/app.py CHANGED
@@ -1,94 +1,112 @@
 
 
 
1
  from datetime import datetime, timedelta
2
- import joblib
3
  import pandas as pd
 
4
  import numpy as np
5
- import plotly.express as px
6
- from matplotlib import pyplot
7
- import warnings
8
- import os
9
- import hopsworks
10
- from dotenv import load_dotenv
11
- load_dotenv()
12
-
13
- import streamlit as st
14
-
15
- import folium
16
- from streamlit_folium import st_folium
17
- import json
18
 
19
- import asyncio
20
- import nest_asyncio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- # Apply nest_asyncio to the current event loop
23
- nest_asyncio.apply(asyncio.get_event_loop())
 
 
 
 
 
 
 
 
 
 
24
 
25
- start_date = datetime.now() - timedelta(hours=48)
26
- end_date = datetime.now() - timedelta(hours=24)
 
27
 
 
 
 
28
 
29
- warnings.filterwarnings("ignore")
 
 
30
 
31
- api_key = os.getenv('HOPSWORKS_API_KEY')
32
- project = hopsworks.login(api_key_value=api_key)
33
- fs = project.get_feature_store()
34
 
35
- @st.cache_data()
36
- def retrieve_dataset(_fv, start_date, end_date):
37
- st.write(36 * "-")
38
- print_fancy_header('\n💾 Dataset Retrieving...')
39
- batch_data = fv.get_batch_data(start_time = start_date, end_time = end_date)
40
- return batch_data
41
 
 
 
42
 
43
- @st.cache_data()
44
- def get_feature_view():
45
- fv = fs.get_feature_view("tesla_stocks_fv", 5)
46
- return fv
47
 
 
 
48
 
49
- @st.cache_data()
50
- def get_model(_project = project):
51
- mr = project.get_model_registry()
52
- model = mr.get_model("stock_pred_model", version = 10)
53
- model_dir = model.download()
54
- return joblib.load(model_dir + "/stock_prediction_model.pkl")
55
- #
56
- #
57
- def print_fancy_header(text, font_size=24):
58
- res = f'<span style="color:#ff5f27; font-size: {font_size}px;">{text}</span>'
59
- st.markdown(res, unsafe_allow_html=True)
60
- #
61
- #def transform_preds(predictions):
62
- # return ['Fraud' if pred == 1 else 'Not Fraud' for pred in predictions]
63
-
64
- progress_bar = st.sidebar.header('⚙️ Working Progress')
65
- progress_bar = st.sidebar.progress(0)
66
- #st.title('🆘 Fraud transactions detection 🆘')
67
-
68
- #st.write(36 * "-")
69
- #print_fancy_header('\n📡 Connecting to Hopsworks Feature Store...')
70
-
71
- #st.write(36 * "-")
72
- #print_fancy_header('\n🤖 Connecting to Model Registry on Hopsworks...')
73
- model = get_model(project)
74
- st.write(model)
75
- st.write("✅ Connected!")
76
-
77
- progress_bar.progress(40)
78
-
79
- st.write(36 * "-")
80
- print_fancy_header('\n✨ Fetch batch data and predict')
81
- fv = get_feature_view()
82
-
83
-
84
- if st.button('📊 Make a prediction'):
85
- batch_data = retrieve_dataset(_fv, start_date, end_date)
86
- st.write("✅ Retrieved!")
87
- #progress_bar.progress(55)
88
- #predictions = model.predict(batch_data)
89
- #predictions = transform_preds(predictions)
90
- #batch_data_to_explore = batch_data.copy()
91
- #batch_data_to_explore['fraud'] = predictions
92
- #explore_data(batch_data_to_explore)
93
-
94
- st.button("Re-run")
 
1
+ import streamlit as st
2
+ import os
3
+ import hopsworks
4
  from datetime import datetime, timedelta
 
5
  import pandas as pd
6
+ from sklearn.preprocessing import OneHotEncoder
7
  import numpy as np
8
+ import joblib
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ def login_hopsworks(api_key):
11
+ project = hopsworks.login(api_key_value=api_key)
12
+ return project
13
+
14
+ def get_feature_data(fs, start_date, end_date):
15
+ feature_view = fs.get_feature_view('tesla_stocks_fv', 3)
16
+ feature_view.init_batch_scoring(training_dataset_version=1)
17
+
18
+ try:
19
+ tesla_df_b = feature_view.get_batch_data(start_time=start_date, end_time=end_date)
20
+ return tesla_df_b
21
+ except Exception as e:
22
+ st.error(f"Error fetching batch data: {e}")
23
+ st.stop()
24
+
25
+ def preprocess_data(df):
26
+ tickers = df[['ticker']]
27
+ encoder = OneHotEncoder()
28
+ ticker_encoded_test = encoder.fit_transform(tickers)
29
+ ticker_encoded_df_test = pd.DataFrame(ticker_encoded_test.toarray(), columns=encoder.get_feature_names_out(['ticker']))
30
+ df = pd.concat([df, ticker_encoded_df_test], axis=1)
31
+ df.drop('ticker', axis=1, inplace=True)
32
+
33
+ df['year'] = df['date'].dt.year
34
+ df['month'] = df['date'].dt.month
35
+ df['day'] = df['date'].dt.day
36
+ df.drop(columns=['date'], inplace=True)
37
+
38
+ return df, encoder
39
+
40
+ def load_model(mr):
41
+ the_model = mr.get_model("stock_pred_model", version=3)
42
+ model_dir = the_model.download()
43
+ model = joblib.load(model_dir + "/stock_prediction_model.pkl")
44
+ return model
45
+
46
+ def make_predictions(model, df):
47
+ df_array = df.to_numpy()
48
+ df_array = np.expand_dims(df_array, axis=1)
49
+ predictions = model.predict(df_array)
50
+ predictions = np.array(predictions, dtype=np.float32)
51
+ predictions = predictions[0][0] * 100
52
+ df['predictions'] = predictions.tolist()
53
+ return df
54
+
55
+ def reconstruct_date_column(df):
56
+ df['date'] = pd.to_datetime(df[['year', 'month', 'day']])
57
+ df.drop(columns=['year', 'month', 'day'], inplace=True)
58
+ return df
59
+
60
+ def inverse_transform_tickers(df, encoder):
61
+ ticker_encoded_df_test = df.filter(like='ticker_')
62
+ ticker_encoded_array = ticker_encoded_df_test.to_numpy()
63
+ original_tickers = encoder.inverse_transform(ticker_encoded_array)
64
+ original_tickers_df = pd.DataFrame(original_tickers, columns=['ticker'])
65
+ df = pd.concat([df.drop(columns=ticker_encoded_df_test.columns), original_tickers_df], axis=1)
66
+ return df
67
 
68
+ def print_fancy_header(text, font_size=24):
69
+ res = f'<span style="color:#ff5f27; font-size: {font_size}px;">{text}</span>'
70
+
71
+ def main():
72
+ st.title("Stock Predictions")
73
+ st.write("Predictions for stocks:")
74
+
75
+ # Initialize Hopsworks
76
+ api_key = os.environ.get('hopsworks_api')
77
+ project = login_hopsworks(api_key)
78
+ fs = project.get_feature_store()
79
+ mr = project.get_model_registry()
80
 
81
+ # Define date range
82
+ start_date = datetime.now() - timedelta(hours=48)
83
+ end_date = datetime.now() - timedelta(hours=24)
84
 
85
+ # Fetch and preprocess feature data
86
+ tesla_df_b = get_feature_data(fs, start_date, end_date)
87
+ tesla_df_b, encoder = preprocess_data(tesla_df_b)
88
 
89
+ # Load the model and make predictions
90
+ model = load_model(mr)
91
+ tesla_df_b = make_predictions(model, tesla_df_b)
92
 
93
+ # Reconstruct the date column and inverse transform tickers
94
+ tesla_df_b = reconstruct_date_column(tesla_df_b)
95
+ tesla_df_b = inverse_transform_tickers(tesla_df_b, encoder)
96
 
97
+ # Display the dataframe and plot the predictions
98
+ selected_ticker = st.selectbox('Select Ticker', tesla_df_b['ticker'])
 
 
 
 
99
 
100
+ # Filter the DataFrame based on the selected ticker
101
+ filtered_df = tesla_df_b[tesla_df_b['ticker'] == selected_ticker]
102
 
103
+ # Display the filtered DataFrame
104
+ st.dataframe(filtered_df)
105
+ #st.dataframe(tesla_df_b)
106
+ #st.line_chart(tesla_df_b.set_index('date')['predictions'])
107
 
108
+ # Additional information
109
+ st.write("Model used: stock_pred_model version 29")
110
 
111
+ if __name__ == "__main__":
112
+ main()