rbgo commited on
Commit
57a5582
·
1 Parent(s): 1cccfdb

all project files

Browse files
Files changed (5) hide show
  1. all_model.py +23 -0
  2. app.py +88 -0
  3. dataset/reliance_30min.csv +0 -0
  4. helper.py +238 -0
  5. model/xgb_f_beta_model.sav +0 -0
all_model.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import numpy as np
3
+ import xgboost
4
+ # from xgboost import XGBClassifier
5
+ # import xgboost as xgb
6
+
7
+ """Input: NULL
8
+ Output: Model
9
+ """
10
+ def load_model():
11
+ load_model = pickle.load(open('model/xgb_f_beta_model.sav','rb'))
12
+
13
+ return load_model
14
+
15
+
16
+ """ Input: Model, Selected_date Data
17
+ Output: Predicted Score
18
+ """
19
+ def prediction(model,data):
20
+ pred = model.predict_proba(data)
21
+ score = np.average(pred[:,1:])
22
+
23
+ return score
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import datetime
4
+ import numpy as np
5
+ import datetime
6
+ import helper
7
+ import all_model
8
+
9
+ def show_information():
10
+ # Show Information about the selected Stock
11
+ st.header('🤫Did you know💡')
12
+ st.caption("Analyzing data from 2015 to 2021")
13
+ st.text("1) There is a 60% chance of gap up opening in any random trade in Reliance 😮 ")
14
+ st.text("2) 1% of the gap up is more than Rs:15.00 i.e more quantity == more profit😇")
15
+ st.text("3) Median, Q3 or 75th percentile have increased from 2015(1.8) to 2021(11.55)💰")
16
+
17
+ def select_date():
18
+ # Select the date for Prediction
19
+ selected_date = st.date_input(
20
+ "Which date you want to check",
21
+ datetime.date(2022, 3, 6))
22
+ st.write('Your selected date is:', selected_date)
23
+
24
+ return selected_date
25
+
26
+ @st.cache
27
+ def prepare_data_for_selected_date():
28
+ df = pd.read_csv("dataset/reliance_30min.csv")
29
+ df = helper.format_date(df)
30
+ df = helper.replace_vol(df)
31
+ df = helper.feature_main(df)
32
+
33
+ return df
34
+
35
+ def freature_data(df,date):
36
+ # st.dataframe(df.loc[str(date)])
37
+ df = df.loc[str(date)]
38
+ df = df.drop(columns=['date'],axis=1)
39
+
40
+
41
+ return df
42
+
43
+
44
+ def show_prediction_result(prepared_data):
45
+ model = all_model.load_model()
46
+ result = all_model.prediction(model,prepared_data)
47
+
48
+ return result
49
+
50
+
51
+
52
+ def main():
53
+ st.title('PROFIT IN THE MORNING!')
54
+ option = st.selectbox(
55
+ 'Which stock would you like to analyze?',
56
+ ('None','Reliance', 'Airtel', 'State Bank Of India'))
57
+
58
+ st.write('You selected:', option)
59
+
60
+
61
+
62
+ if option=="Reliance":
63
+ data_link = ("C:/Users/Rajdeep Borgohain.000/Desktop/reliance_30min.csv")
64
+ dateSelect = False
65
+ # About Reliance Stock
66
+ show_information()
67
+ selected_date = select_date()
68
+ prepared_data = prepare_data_for_selected_date()
69
+ prepared_data = freature_data(prepared_data,selected_date)
70
+ score = show_prediction_result(prepared_data)
71
+ st.write('')
72
+ selected_date+=datetime.timedelta(days=1)
73
+
74
+ if score == 'nan':
75
+ text = f'No data avaliable for the selected date {selected_date}'
76
+ st.warning(text)
77
+ elif score >= 0.5:
78
+ score = np.round(score,4)*100
79
+ text = f'The chances of Gap up on: {selected_date} is {score}%'
80
+ st.success(text)
81
+ elif score < 0.5:
82
+ text = f'The chances of Gap up on: {selected_date} is {score}'
83
+ st.error(text)
84
+ else:
85
+ st.text('Data Not Avaliable!')
86
+
87
+ if __name__ == "__main__":
88
+ main()
dataset/reliance_30min.csv ADDED
The diff for this file is too large to render. See raw diff
 
helper.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import talib as ta
3
+ import numpy as np
4
+
5
+ def format_date(df):
6
+ format = '%Y-%m-%d %H:%M:%S'
7
+ df['Datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'], format=format)
8
+ df = df.set_index(pd.DatetimeIndex(df['Datetime']))
9
+ df = df.drop('Datetime', axis=1)
10
+
11
+ return df
12
+
13
+ # https://stackoverflow.com/questions/39684548/convert-the-string-2-90k-to-2900-or-5-2m-to-5200000-in-pandas-dataframe
14
+ def replace_vol(df):
15
+ df.volume = (df.volume.replace(r'[KM]+$', '', regex=True).astype(float) * \
16
+ df.volume.str.extract(r'[\d\.]+([KM]+)', expand=False)
17
+ .fillna(1)
18
+ .replace(['K','M'], [10**3, 10**6]).astype(int))
19
+ return df
20
+
21
+ def get_all_features(df):
22
+ #get_overlap_studies
23
+ # BBANDS - Bollinger Bands
24
+ df['bbub'], df['bbmb'], df['bblb'] = ta.BBANDS(df['close'])
25
+
26
+ # DEMA - Double Exponential Moving Average
27
+ df['DEMA_100'] = ta.DEMA(df['close'],timeperiod=100)
28
+ df['DEMA_30'] = ta.DEMA(df['close'],timeperiod=30)
29
+ df['DEMA_5'] = ta.DEMA(df['close'],timeperiod=5)
30
+
31
+ # EMA - Exponential Moving Average
32
+ df['EMA_100'] = ta.EMA(df['close'],timeperiod=100)
33
+ df['EMA_30'] = ta.EMA(df['close'],timeperiod=30)
34
+ df['EMA_5'] = ta.EMA(df['close'],timeperiod=5)
35
+
36
+ # HT_TRENDLINE - Hilbert Transform - Instantaneous Trendline
37
+ df['HT_TRENDLINE'] = ta.HT_TRENDLINE(df['close'])
38
+
39
+ # KAMA - Kaufman Adaptive Moving Average
40
+ df['KAMA'] = ta.KAMA(df['close'])
41
+
42
+ # MA - Moving average
43
+ df['MA_100'] = ta.MA(df['close'],timeperiod=100)
44
+ df['MA_30'] = ta.MA(df['close'],timeperiod=30)
45
+ df['MA_5'] = ta.MA(df['close'],timeperiod=5)
46
+
47
+ # MAMA - MESA Adaptive Moving Average
48
+ df['MAMA'], df['FAMA'] = ta.MAMA(df['close'])
49
+
50
+ # MIDPOINT - MidPoint over period
51
+ df['MIDPOINT'] = ta.MIDPOINT(df['close'])
52
+
53
+ # MIDPRICE - Midpoint Price over period
54
+ df['MIDPRICE'] = ta.MIDPRICE(df.high, df.low, timeperiod=14)
55
+
56
+ # SAR - Parabolic SAR
57
+ df['SAR'] = ta.SAR(df.high, df.low, acceleration=0, maximum=0)
58
+
59
+ # SAREXT - Parabolic SAR - Extended
60
+ df['SAREXT'] = ta.SAREXT(df.high, df.low, startvalue=0, offsetonreverse=0, accelerationinitlong=0, accelerationlong=0, accelerationmaxlong=0, accelerationinitshort=0, accelerationshort=0, accelerationmaxshort=0)
61
+
62
+ # SMA - Simple Moving Average
63
+ df['SMA_100'] = ta.SMA(df['close'],timeperiod=100)
64
+ df['SMA_30'] = ta.SMA(df['close'],timeperiod=30)
65
+ df['SMA_5'] = ta.SMA(df['close'],timeperiod=5)
66
+
67
+ # T3 - Triple Exponential Moving Average (T3)
68
+ df['T3'] = ta.T3(df.close, timeperiod=5, vfactor=0)
69
+
70
+ # TEMA - Triple Exponential Moving Average
71
+ df['TEMA_100'] = ta.TEMA(df['close'],timeperiod=100)
72
+ df['TEMA_30'] = ta.TEMA(df['close'],timeperiod=30)
73
+ df['TEMA_5'] = ta.TEMA(df['close'],timeperiod=5)
74
+
75
+ # TRIMA - Triangular Moving Average
76
+ df['TRIMA_100'] = ta.TRIMA(df['close'],timeperiod=100)
77
+ df['TRIMA_30'] = ta.TRIMA(df['close'],timeperiod=30)
78
+ df['TRIMA_5'] = ta.TRIMA(df['close'],timeperiod=5)
79
+
80
+ # WMA - Weighted Moving Average
81
+ df['WMA_100'] = ta.WMA(df['close'],timeperiod=100)
82
+ df['WMA_30'] = ta.WMA(df['close'],timeperiod=30)
83
+ df['WMA_5'] = ta.WMA(df['close'],timeperiod=5)
84
+
85
+
86
+ #get_momentum_indicator
87
+ # ADX - Average Directional Movement Index
88
+ df['ADX'] = ta.ADX(df.high, df.low, df.close, timeperiod=14)
89
+
90
+ # ADXR - Average Directional Movement Index Rating
91
+ df['ADXR'] = ta.ADXR(df.high, df.low, df.close, timeperiod=14)
92
+
93
+ # APO - Absolute Price Oscillator
94
+ df['APO'] = ta.APO(df.close, fastperiod=12, slowperiod=26, matype=0)
95
+
96
+ # AROON - Aroon
97
+ df['AROON_DWN'],df['AROON_UP'] = ta.AROON(df.high, df.low, timeperiod=14)
98
+
99
+ # AROONOSC - Aroon Oscillator
100
+ df['AROONOSC'] = ta.AROONOSC(df.high, df.low, timeperiod=14)
101
+
102
+ # BOP - Balance Of Power
103
+ df['BOP'] = ta.BOP(df.open, df.high, df.low, df.close)
104
+
105
+ # CCI - Commodity Channel Index
106
+ df['CCI'] = ta.CCI(df.high, df.low, df.close, timeperiod=14)
107
+
108
+ # CMO - Chande Momentum Oscillator
109
+ df['CMO']= ta.CMO(df.close, timeperiod=14)
110
+
111
+ # DX - Directional Movement Index
112
+ df['DX'] = ta.DX(df.high, df.low, df.close, timeperiod=14)
113
+
114
+ # MACD - Moving Average Convergence/Divergence
115
+ df['MACD'], df['MACD_SGNL'], df['MACD_HIST'] = ta.MACD(df.close, fastperiod=12, slowperiod=26, signalperiod=9)
116
+
117
+ # MACDFIX - Moving Average Convergence/Divergence Fix 12/26
118
+ df['MACDF'], df['MACDF_SGNL'], df['MACDF_HIST'] = ta.MACDFIX(df.close)
119
+
120
+ # MFI - Money Flow Index
121
+ df['MFI'] = ta.MFI(df.high, df.low, df.close, df.volume, timeperiod=14)
122
+
123
+ # MINUS_DI - Minus Directional Indicator
124
+ df['MINUS_DI'] = ta.MINUS_DI(df.high, df.low, df.close, timeperiod=14)
125
+
126
+ # MINUS_DM - Minus Directional Movement
127
+ df['MINUS_DM'] = ta.MINUS_DM(df.high, df.low, timeperiod=14)
128
+
129
+ # MOM - Momentum
130
+ df['MOM'] = ta.MOM(df.close, timeperiod=10)
131
+
132
+ # PLUS_DI - Plus Directional Indicator
133
+ df['PLUS_DI'] = ta.PLUS_DI(df.high, df.low, df.close, timeperiod=14)
134
+
135
+ # PLUS_DM - Plus Directional Indicator
136
+ df['PLUS_DM'] = ta.PLUS_DM(df.high, df.low, timeperiod=14)
137
+
138
+ # PPO - Percentage Price Oscillator
139
+ df['PPO'] = ta.PPO(df.close, fastperiod=12, slowperiod=26, matype=0)
140
+
141
+ # ROC - Rate of change : ((price/prevPrice)-1)*100
142
+ df['ROC'] = ta.ROC(df.close, timeperiod=10)
143
+
144
+ # ROCP - Rate of change Percentage: (price-prevPrice)/prevPrice
145
+ df['ROCP'] = ta.ROCP(df.close, timeperiod=10)
146
+
147
+ # ROCR - Rate of change Percentage: (price-prevPrice)/prevPrice
148
+ df['ROCR'] = ta.ROCR(df.close, timeperiod=10)
149
+
150
+ # ROCR100 - Rate of change ratio 100 scale: (price/prevPrice)*100
151
+ df['ROCR100'] = ta.ROCR100(df.close, timeperiod=10)
152
+
153
+ # RSI - Relative Strength Index
154
+ df['RSI'] = ta.RSI(df.close, timeperiod=14)
155
+
156
+ # STOCH - Stochastic
157
+ df['STOCH_SLWK'], df['STOCH_SLWD'] = ta.STOCH(df.high, df.low, df.close, fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
158
+
159
+ # STOCHF - Stochastic Fast
160
+ df['STOCH_FSTK'], df['STOCH_FSTD'] = ta.STOCHF(df.high, df.low, df.close, fastk_period=5, fastd_period=3, fastd_matype=0)
161
+
162
+ # STOCHRSI - Stochastic Relative Strength Index
163
+ df['STOCHRSI_FSTK'], df['STOCHRSI_FSTD'] = ta.STOCHRSI(df.close, timeperiod=14, fastk_period=5, fastd_period=3, fastd_matype=0)
164
+
165
+ # TRIX - 1-day Rate-Of-Change (ROC) of a Triple Smooth EMA
166
+ df['TRIX'] = ta.TRIX(df.close, timeperiod=30)
167
+
168
+ # ULTOSC - Ultimate Oscillator
169
+ df['ULTOSC'] = ta.ULTOSC(df.high, df.low, df.close, timeperiod1=7, timeperiod2=14, timeperiod3=28)
170
+
171
+ # WILLR - Williams' %R
172
+ df['WILLR'] = ta.WILLR(df.high, df.low, df.close, timeperiod=14)
173
+
174
+
175
+ # get_volume_indicator
176
+ # AD - Chaikin A/D Line
177
+ df['AD'] = ta.AD(df.high, df.low, df.close, df.volume)
178
+
179
+ # ADOSC - Chaikin A/D Oscillator
180
+ df['ADOSC'] = ta.ADOSC(df.high, df.low, df.close, df.volume, fastperiod=3, slowperiod=10)
181
+
182
+ # OBV - On Balance Volume
183
+ df['OBV'] = ta.OBV(df.close, df.volume)
184
+
185
+
186
+ # get_volatility_indicator
187
+ # ATR - Average True Range
188
+ df['ATR'] = ta.ATR(df.high, df.low, df.close, timeperiod=14)
189
+
190
+ # NATR - Normalized Average True Range
191
+ df['NATR'] = ta.NATR(df.high, df.low, df.close, timeperiod=14)
192
+
193
+ # TRANGE - True Range
194
+ df['TRANGE'] = ta.TRANGE(df.high, df.low, df.close)
195
+
196
+
197
+ # get_transform_price
198
+ # AVGPRICE - Average Price
199
+ df['AVGPRICE'] = ta.AVGPRICE(df.open, df.high, df.low, df.close)
200
+
201
+ # MEDPRICE - Median Price
202
+ df['MEDPRICE'] = ta.MEDPRICE(df.high, df.low)
203
+
204
+ # TYPPRICE - Typical Price
205
+ df['TYPPRICE'] = ta.TYPPRICE(df.high, df.low, df.close)
206
+
207
+ # WCLPRICE - Weighted Close Price
208
+ df['WCLPRICE'] = ta.WCLPRICE(df.high, df.low, df.close)
209
+
210
+
211
+ # get_cycle_indicator
212
+ # HT_DCPERIOD - Hilbert Transform - Dominant Cycle Period
213
+ df['HT_DCPERIOD'] = ta.HT_DCPERIOD(df.close)
214
+
215
+ # HT_DCPHASE - Hilbert Transform - Dominant Cycle Phase
216
+ df['HT_DCPHASE'] = ta.HT_DCPHASE(df.close)
217
+
218
+ # HT_PHASOR - Hilbert Transform - Phasor Components
219
+ df['HT_PHASOR_IP'], df['HT_PHASOR_QD'] = ta.HT_PHASOR(df.close)
220
+
221
+ # HT_SINE - Hilbert Transform - SineWave
222
+ df['HT_SINE'], df['HT_SINE_LEADSINE'] = ta.HT_SINE(df.close)
223
+
224
+ # HT_TRENDMODE - Hilbert Transform - Trend vs Cycle Mode
225
+ df['HT_TRENDMODE'] = ta.HT_TRENDMODE(df.close)
226
+
227
+ return df
228
+
229
+ def feature_main(df):
230
+ df['time'] = df['time'].map(lambda x: np.sum(list(map(int, str(x).split(':')))))
231
+
232
+ df = get_all_features(df)
233
+ values = {}
234
+ for col in df.columns:
235
+ idx = df.reset_index()[col].first_valid_index()
236
+ values[col] = df.iloc[idx][col]
237
+ df = df.fillna(value=values)
238
+ return df
model/xgb_f_beta_model.sav ADDED
Binary file (143 kB). View file