amgross01 IoannisTr commited on
Commit
5b4694b
0 Parent(s):

Duplicate from IoannisTr/Tech_Stocks_Trading_Assistant

Browse files

Co-authored-by: Ioannis Triantafyllakis <IoannisTr@users.noreply.huggingface.co>

.gitattributes ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.model filter=lfs diff=lfs merge=lfs -text
11
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
12
+ *.npy filter=lfs diff=lfs merge=lfs -text
13
+ *.npz filter=lfs diff=lfs merge=lfs -text
14
+ *.onnx filter=lfs diff=lfs merge=lfs -text
15
+ *.ot filter=lfs diff=lfs merge=lfs -text
16
+ *.parquet filter=lfs diff=lfs merge=lfs -text
17
+ *.pickle filter=lfs diff=lfs merge=lfs -text
18
+ *.pkl filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pt filter=lfs diff=lfs merge=lfs -text
21
+ *.pth filter=lfs diff=lfs merge=lfs -text
22
+ *.rar filter=lfs diff=lfs merge=lfs -text
23
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
24
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
25
+ *.tflite filter=lfs diff=lfs merge=lfs -text
26
+ *.tgz filter=lfs diff=lfs merge=lfs -text
27
+ *.wasm filter=lfs diff=lfs merge=lfs -text
28
+ *.xz filter=lfs diff=lfs merge=lfs -text
29
+ *.zip filter=lfs diff=lfs merge=lfs -text
30
+ *.zst filter=lfs diff=lfs merge=lfs -text
31
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.streamlit/config.toml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [theme]
2
+ primaryColor = "#181818"
3
+ backgroundColor = "#212121"
4
+ secondaryBackgroundColor = "#3D3D3D"
5
+ textColor = "#E4E6EB"
6
+ font = "sans serif"
FinBERT_training.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
3
+ os.environ['WANDB_DISABLED'] = "true"
4
+ import pandas as pd
5
+ from sklearn.preprocessing import LabelEncoder
6
+ from sklearn.model_selection import train_test_split
7
+ from transformers import (
8
+ AutoTokenizer,
9
+ DataCollatorWithPadding,
10
+ TrainingArguments,
11
+ Trainer,
12
+ AutoModelForSequenceClassification
13
+ )
14
+ from datasets import Dataset
15
+
16
+ #######################################
17
+ ########## FinBERT training ###########
18
+ #######################################
19
+
20
+ class args:
21
+ model = 'ProsusAI/finbert'
22
+
23
+ df = pd.read_csv('all-data.csv',
24
+ names = ['labels','messages'],
25
+ encoding='ISO-8859-1')
26
+
27
+ df = df[['messages', 'labels']]
28
+
29
+ le = LabelEncoder()
30
+ df['labels'] = le.fit_transform(df['labels'])
31
+
32
+ X, y = df['messages'].values, df['labels'].values
33
+
34
+ xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.1)
35
+ xtrain, xvalid, ytrain, yvalid = train_test_split(xtrain, ytrain, test_size=0.2)
36
+
37
+ train_dataset_raw = Dataset.from_dict({'text':xtrain, 'labels':ytrain})
38
+ valid_dataset_raw = Dataset.from_dict({'text':xvalid, 'labels':yvalid})
39
+
40
+ tokenizer = AutoTokenizer.from_pretrained(args.model)
41
+
42
+ def tokenize_fn(examples):
43
+ return tokenizer(examples['text'], truncation=True)
44
+
45
+ train_dataset = train_dataset_raw.map(tokenize_fn, batched=True)
46
+ valid_dataset = valid_dataset_raw.map(tokenize_fn, batched=True)
47
+
48
+ data_collator = DataCollatorWithPadding(tokenizer)
49
+
50
+ model = AutoModelForSequenceClassification.from_pretrained(args.model)
51
+
52
+ train_args = TrainingArguments(
53
+ './Finbert Trained/',
54
+ per_device_train_batch_size=16,
55
+ per_device_eval_batch_size=2*16,
56
+ num_train_epochs=5,
57
+ learning_rate=2e-5,
58
+ weight_decay=0.01,
59
+ warmup_ratio=0.1,
60
+ do_eval=True,
61
+ do_train=True,
62
+ do_predict=True,
63
+ evaluation_strategy='epoch',
64
+ save_strategy="no",
65
+ )
66
+
67
+ trainer = Trainer(
68
+ model,
69
+ train_args,
70
+ train_dataset=train_dataset,
71
+ eval_dataset=valid_dataset,
72
+ data_collator=data_collator,
73
+ tokenizer=tokenizer
74
+ )
75
+
76
+ trainer.train()
77
+
78
+ # saving the model and the weights
79
+ model.save_pretrained('fine_tuned_FinBERT')
80
+ # saving the tokenizer
81
+ tokenizer.save_pretrained("fine_tuned_FinBERT/tokenizer/")
82
+
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Tech Stocks Trading Assistant
3
+ emoji: U+1F4B5
4
+ colorFrom: red
5
+ colorTo: purple
6
+ sdk: streamlit
7
+ sdk_version: 1.10.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: IoannisTr/Tech_Stocks_Trading_Assistant
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from stocks import *
2
+ from functions import *
3
+ from datetime import datetime
4
+ import streamlit as st
5
+
6
+ st.set_page_config(layout="wide")
7
+
8
+ st.title("Tech Stocks Trading Assistant")
9
+
10
+ left_column, right_column = st.columns(2)
11
+
12
+ with left_column:
13
+
14
+ all_tickers = {
15
+ "Apple":"AAPL",
16
+ "Microsoft":"MSFT",
17
+ "Nvidia":"NVDA",
18
+ "Paypal":"PYPL",
19
+ "Amazon":"AMZN",
20
+ "Spotify":"SPOT",
21
+ #"Twitter":"TWTR",
22
+ "AirBnB":"ABNB",
23
+ "Uber":"UBER",
24
+ "Google":"GOOG"
25
+ }
26
+
27
+ st.subheader("Technical Analysis Methods")
28
+ option_name = st.selectbox('Choose a stock:', all_tickers.keys())
29
+ option_ticker = all_tickers[option_name]
30
+ execution_timestamp = datetime.now()
31
+ 'You selected: ', option_name, "(",option_ticker,")"
32
+ 'Last execution:', execution_timestamp
33
+
34
+ s = Stock_Data()
35
+ t = s.Ticker(tick=option_ticker)
36
+
37
+ m = Models()
38
+
39
+ with st.spinner('Loading stock data...'):
40
+
41
+ technical_analysis_methods_outputs = {
42
+ 'Technical Analysis Method': [
43
+ 'Bollinger Bands (20 days & 2 stand. deviations)',
44
+ 'Bollinger Bands (10 days & 1.5 stand. deviations)',
45
+ 'Bollinger Bands (50 days & 3 stand. deviations)',
46
+ 'Moving Average Convergence Divergence (MACD)'
47
+ ],
48
+ 'Outlook': [
49
+ m.bollinger_bands_20d_2std(t),
50
+ m.bollinger_bands_10d_1point5std(t),
51
+ m.bollinger_bands_50d_3std(t),
52
+ m.MACD(t)
53
+ ],
54
+ 'Timeframe of Method': [
55
+ "Medium-term",
56
+ "Short-term",
57
+ "Long-term",
58
+ "Short-term"
59
+ ]
60
+ }
61
+
62
+ df = pd.DataFrame(technical_analysis_methods_outputs)
63
+
64
+
65
+ def color_survived(val):
66
+ color = ""
67
+ if (val=="Sell" or val=="Downtrend and sell signal" or val=="Downtrend and no signal"):
68
+ color="#EE3B3B"
69
+ elif (val=="Buy" or val=="Uptrend and buy signal" or val=="Uptrend and no signal"):
70
+ color="#3D9140"
71
+ else:
72
+ color="#CD950C"
73
+ return f'background-color: {color}'
74
+
75
+
76
+ st.table(df.sort_values(['Timeframe of Method'], ascending=False).
77
+ reset_index(drop=True).style.applymap(color_survived, subset=['Outlook']))
78
+
79
+ with right_column:
80
+
81
+ st.subheader("FinBERT-based Sentiment Analysis")
82
+
83
+ with st.spinner("Connecting with www.marketwatch.com..."):
84
+ st.plotly_chart(m.finbert_headlines_sentiment(t)["fig"])
85
+ "Current sentiment:", m.finbert_headlines_sentiment(t)["current_sentiment"], "%"
86
+
87
+ st.subheader("LSTM-based 7-day stock price prediction model")
88
+
89
+ with st.spinner("Compiling LSTM model.."):
90
+ st.plotly_chart(m.LSTM_7_days_price_predictor(t))
91
+
fine_tuned_FinBERT/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ProsusAI/finbert",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "positive",
14
+ "1": "negative",
15
+ "2": "neutral"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "negative": 1,
21
+ "neutral": 2,
22
+ "positive": 0
23
+ },
24
+ "layer_norm_eps": 1e-12,
25
+ "max_position_embeddings": 512,
26
+ "model_type": "bert",
27
+ "num_attention_heads": 12,
28
+ "num_hidden_layers": 12,
29
+ "pad_token_id": 0,
30
+ "position_embedding_type": "absolute",
31
+ "problem_type": "single_label_classification",
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.19.4",
34
+ "type_vocab_size": 2,
35
+ "use_cache": true,
36
+ "vocab_size": 30522
37
+ }
fine_tuned_FinBERT/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cf94068479cc76e50e49b3046457a411e3616f5c2daa430cdc8e62a6374cff7
3
+ size 438006381
fine_tuned_FinBERT/tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
fine_tuned_FinBERT/tokenizer/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
fine_tuned_FinBERT/tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "name_or_path": "ProsusAI/finbert", "special_tokens_map_file": "C:\\Users\\user/.cache\\huggingface\\transformers\\4c21e8896b03f68c2e028133cf579267c62aba9de03a704a0845704e58eefe9e.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "BertTokenizer"}
fine_tuned_FinBERT/tokenizer/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
functions.py ADDED
@@ -0,0 +1,482 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from asyncio.constants import LOG_THRESHOLD_FOR_CONNLOST_WRITES
2
+ import yfinance as yf
3
+ import pandas as pd
4
+ import numpy as np
5
+ import plotly.graph_objs as go
6
+ from stocks import *
7
+ from transformers import AutoModelForSequenceClassification, pipeline, AutoTokenizer
8
+ import os
9
+ from random import random
10
+ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
11
+ import tensorflow as tf
12
+ import math
13
+ import datetime
14
+ import random
15
+ import time
16
+ #import kaleido
17
+ from sklearn.preprocessing import MinMaxScaler
18
+ import matplotlib.pyplot as plt
19
+ #import warnings
20
+ import tensorflow as tf
21
+ from tensorflow import keras
22
+ from keras.layers import Dropout, Activation
23
+ from keras import layers
24
+ from keras.callbacks import EarlyStopping
25
+ from sklearn.metrics import r2_score
26
+ import plotly.graph_objs as go
27
+ import plotly.io as pio
28
+ pio.templates
29
+
30
+ model = AutoModelForSequenceClassification.from_pretrained("fine_tuned_FinBERT", from_tf=False, config="config.json")
31
+ tokenizer = AutoTokenizer.from_pretrained("fine_tuned_FinBERT/tokenizer/")
32
+
33
+ class Models(object):
34
+ def __init__(self):
35
+ self.stock_data = Stock_Data()
36
+
37
+ def bollinger_bands_20d_2std(self, ticker):
38
+ '''
39
+ This method calculates the Bollinger Bands with a Rolling average of the last 20 days and 2 standard deviations. In a plot,
40
+ this would be represented as 3 lines: a rolling average, an upper bound (rolling average + 2 standard deviations) and a lower
41
+ bound (rolling average - 2 standard deviations). When the price of a stock is between the rolling average and lower bound, it is
42
+ considered as oversold, so it makes sense to buy, if it is between the roll. avg. and the upper bound, it is considered as
43
+ overbought, so it makes sense to sell, if it is equal to the roll.avg. it is neutral and if it is outside the bounds, it is
44
+ considered an Unusual Event. The function returns the outlook of the stock (either "Buy", or "Sell" or "Hold" or "Unusual Event")
45
+ '''
46
+ if self.stock_data.status_getter(ticker) != "Open":
47
+ return "Market Closed"
48
+ else:
49
+ data = self.stock_data.stock_data_getter(ticker)
50
+ low_high_closing_df = pd.DataFrame(data)
51
+ low_high_closing_df = data.iloc[:, 4:5] # Getting only the "Adj Close" column
52
+ low_high_closing_df = low_high_closing_df.tail(40) # Getting the last 40 days
53
+
54
+ low_high_closing_df["rolling_avg_20d"] = low_high_closing_df['Adj Close'].rolling(20, min_periods = 20).mean()
55
+ low_high_closing_df["sd"] = low_high_closing_df["Adj Close"].rolling(20, min_periods = 20).std()
56
+ low_high_closing_df = low_high_closing_df.tail(20) # Keeping the last 20 days only
57
+
58
+ recent_data = low_high_closing_df.iloc[-1, :].to_list() # Creating a Series object with the most recent data (last row only)
59
+
60
+ upper_bound = recent_data[1] + 2*recent_data[2] # Upper Bound
61
+ lower_bound = recent_data[1] - 2*recent_data[2] # Lower Bound
62
+ mean_20d = recent_data[1] # Rolling average of last 20 days
63
+
64
+ if self.stock_data.current_price_getter(ticker) is None:
65
+ return "Market Closed"
66
+ else:
67
+ message = ""
68
+
69
+ if self.stock_data.current_price_getter(ticker) < mean_20d and self.stock_data.current_price_getter(ticker) >= lower_bound:
70
+ message = "Buy"
71
+ elif self.stock_data.current_price_getter(ticker) > mean_20d and self.stock_data.current_price_getter(ticker) <= upper_bound:
72
+ message = "Sell"
73
+ elif self.stock_data.current_price_getter(ticker) == mean_20d:
74
+ message = "Hold"
75
+ elif self.stock_data.current_price_getter(ticker) <= lower_bound or self.stock_data.current_price_getter(ticker) >= upper_bound:
76
+ message = "Unusual Event"
77
+ return message
78
+
79
+ def bollinger_bands_10d_1point5std(self, ticker):
80
+ '''
81
+ This method calculates the Bollinger Bands with a Rolling average of the last 10 days and 1.5 standard deviations. In a plot,
82
+ this would be represented as 3 lines: a rolling average, an upper bound (rolling average + 1.5 standard deviations) and a lower
83
+ bound (rolling average - 1.5 standard deviations). When the price of a stock is between the rolling average and lower bound, it is
84
+ considered as oversold, so it makes sense to buy, if it is between the roll. avg. and the upper bound, it is considered as
85
+ overbought, so it makes sense to sell, if it is equal to the roll.avg. it is neutral and if it is outside the bounds, it is
86
+ considered an Unusual Event. The function returns the outlook of the stock (either "Buy", or "Sell" or "Hold" or "Unusual Event")
87
+ '''
88
+ if self.stock_data.status_getter(ticker) != "Open":
89
+ return "Market Closed"
90
+ else:
91
+ data = self.stock_data.stock_data_getter(ticker)
92
+
93
+ low_high_closing_df = pd.DataFrame(data)
94
+ low_high_closing_df = data.iloc[:, 4:5] # Getting only the "Adj Close" column
95
+ low_high_closing_df = low_high_closing_df.tail(20) # Getting the last 20 days
96
+
97
+ low_high_closing_df["rolling_avg_10d"] = low_high_closing_df['Adj Close'].rolling(10, min_periods = 10).mean()
98
+ low_high_closing_df["sd"] = low_high_closing_df["Adj Close"].rolling(10, min_periods = 10).std()
99
+ low_high_closing_df = low_high_closing_df.tail(10) # Keeping the last 10 days only
100
+
101
+ recent_data = low_high_closing_df.iloc[-1, :].to_list() # Creating a Series object with the most recent data (last row only)
102
+
103
+ upper_bound = recent_data[1] + 1.5*recent_data[2] # Upper Bound
104
+ lower_bound = recent_data[1] - 1.5*recent_data[2] # Lower Bound
105
+ mean_10d = recent_data[1] # Rolling average of last 10 days
106
+
107
+ if self.stock_data.current_price_getter(ticker) is None:
108
+ return "Market Closed"
109
+ else:
110
+ message = ""
111
+
112
+ if self.stock_data.current_price_getter(ticker) < mean_10d and self.stock_data.current_price_getter(ticker) >= lower_bound:
113
+ message = "Buy"
114
+ elif self.stock_data.current_price_getter(ticker) > mean_10d and self.stock_data.current_price_getter(ticker) <= upper_bound:
115
+ message = "Sell"
116
+ elif self.stock_data.current_price_getter(ticker) == mean_10d:
117
+ message = "Hold"
118
+ elif self.stock_data.current_price_getter(ticker) <= lower_bound or self.stock_data.current_price_getter(ticker) >= upper_bound:
119
+ message = "Unusual Event"
120
+ return message
121
+
122
+ def bollinger_bands_50d_3std(self, ticker):
123
+ '''
124
+ This method calculates the Bollinger Bands with a Rolling average of the last 50 days and 3 standard deviations. In a plot,
125
+ this would be represented as 3 lines: a rolling average, an upper bound (rolling average + 3 standard deviations) and a lower
126
+ bound (rolling average - 3 standard deviations). When the price of a stock is between the rolling average and lower bound, it is
127
+ considered as oversold, so it makes sense to buy, if it is between the roll. avg. and the upper bound, it is considered as
128
+ overbought, so it makes sense to sell, if it is equal to the roll.avg. it is neutral and if it is outside the bounds, it is
129
+ considered an Unusual Event. The function returns the outlook of the stock (either "Buy", or "Sell" or "Hold" or "Unusual Event")
130
+ '''
131
+ if self.stock_data.status_getter(ticker) != "Open":
132
+ return "Market Closed"
133
+ else:
134
+ data = self.stock_data.stock_data_getter(ticker)
135
+
136
+ low_high_closing_df = pd.DataFrame(data)
137
+ low_high_closing_df = data.iloc[:, 4:5] # Getting only the "Adj Close" column
138
+ low_high_closing_df = low_high_closing_df.tail(100) # Getting the last 100 days
139
+
140
+ low_high_closing_df["rolling_avg_50d"] = low_high_closing_df['Adj Close'].rolling(50, min_periods = 50).mean()
141
+ low_high_closing_df["sd"] = low_high_closing_df["Adj Close"].rolling(50, min_periods = 50).std()
142
+ low_high_closing_df = low_high_closing_df.tail(50) # Keeping the last 50 days only
143
+
144
+ recent_data = low_high_closing_df.iloc[-1, :].to_list() # Creating a Series object with the most recent data (last row only)
145
+
146
+ upper_bound = recent_data[1] + 3*recent_data[2] # Upper Bound
147
+ lower_bound = recent_data[1] - 3*recent_data[2] # Lower Bound
148
+ mean_50d = recent_data[1] # Rolling average of last 50 days
149
+
150
+ # Finding the outlook dependent on the current price
151
+ if self.stock_data.current_price_getter(ticker) is None:
152
+ return "Market Closed"
153
+ else:
154
+ message = ""
155
+ if self.stock_data.current_price_getter(ticker) < mean_50d and self.stock_data.current_price_getter(ticker) >= lower_bound:
156
+ message = "Buy"
157
+ elif self.stock_data.current_price_getter(ticker) > mean_50d and self.stock_data.current_price_getter(ticker) <= upper_bound:
158
+ message = "Sell"
159
+ elif self.stock_data.current_price_getter(ticker) == mean_50d:
160
+ message = "Hold"
161
+ elif self.stock_data.current_price_getter(ticker) <= lower_bound or self.stock_data.current_price_getter(ticker) >= upper_bound:
162
+ message = "Unusual Event"
163
+ return message
164
+
165
+ def MACD(self, ticker):
166
+ '''
167
+ This method calculates the MACD (Mean Average Convergence Divergence) for a stock. The decision of whether to buy or sell
168
+ a stock when using this method, depends on the difference of two "lines". The 1st one is called "MACD" and is equal to the
169
+ difference between the Exponential Moving Average of the adjusted closing price of the last 12 days, and the Moving Average
170
+ of the adjusted closing price of the last 26 days. The 2nd line is the 9 day moving average of the adj. closing price.
171
+ When MACD > 9 day M.A. it is considered that there is an uptrend, else, an downtrend.
172
+ At last, when MACD line crosses the 9 day M.A. from "above", a "Sell" signal is given,
173
+ while it crosses it from below, a "Buy" signal is given.
174
+ '''
175
+ if self.stock_data.status_getter(ticker) != "Open":
176
+ return "Market Closed"
177
+ else:
178
+ data = self.stock_data.stock_data_getter(ticker)
179
+
180
+ low_high_closing_df = pd.DataFrame(data)
181
+ low_high_closing_df = data.iloc[:, 4:5] # Getting only the "Adj Close" column
182
+ low_high_closing_df = low_high_closing_df.tail(52) # Getting the last 52 days
183
+
184
+
185
+ # Get the 12-day EMA of the closing price
186
+ low_high_closing_df['EMA_12d'] = low_high_closing_df['Adj Close'].ewm(span=12, adjust=False, min_periods=12).mean()
187
+ # Get the 26-day MA of the closing price
188
+ low_high_closing_df['MA_26d'] = low_high_closing_df['Adj Close'].ewm(span=26, adjust=False, min_periods=26).mean()
189
+ # Subtract the 26-day EMA from the 12-Day EMA to get the MACD
190
+ low_high_closing_df['MACD'] = low_high_closing_df['EMA_12d'] - low_high_closing_df['MA_26d']
191
+ # Making the signal line
192
+ low_high_closing_df['MA_9d'] = low_high_closing_df['MACD'].ewm(span=9, adjust=False, min_periods=9).mean()
193
+
194
+ low_high_closing_df['Diff'] = low_high_closing_df['MACD'] - low_high_closing_df['MA_9d']
195
+
196
+ Diff = low_high_closing_df['Diff'].astype(float)
197
+
198
+ if self.stock_data.current_price_getter(ticker) is None:
199
+ return "Market Closed"
200
+ else:
201
+ message = ""
202
+
203
+ if Diff.iloc[-1] < 0:
204
+ if Diff.iloc[-2] >= 0:
205
+ message = "Downtrend and sell signal"
206
+ else:
207
+ message = "Downtrend and no signal"
208
+ else:
209
+ if Diff.iloc[-2] <= 0:
210
+ message = "Uptrend and buy signal"
211
+ else:
212
+ message = "Uptrend and no signal"
213
+ return message
214
+
215
+ def finbert_headlines_sentiment(self, ticker):
216
+ '''
217
+ This method uses a the "weights" and the "tokenizer" of a fine-tuned Fin-BERT model, which is a BERT model that
218
+ was furtherly trained on financial data. The "article_parser()" method scraps www.marketwatch.com and returns the
219
+ last 17 headers of the chosen stock's articles. The, the FinBERT model classifies each one of them as either "Positive"
220
+ or "Negative" or "Neutral", and a score is assigned to each header (+100, -100, and 0) correspondingly. At last, a
221
+ rolling average of window size = 5 is used to "smooth" the sentiment line of the "plotly" plot that is returned.
222
+ '''
223
+
224
+ articles_df = self.stock_data.article_parser(ticker)
225
+ articles_list = articles_df["headline"].tolist()
226
+
227
+ clf = pipeline("text-classification", model=model, tokenizer=tokenizer)
228
+ outputs_list = clf(articles_list)
229
+
230
+ sentiments = []
231
+
232
+ for item in outputs_list:
233
+ sentiments.append(item["label"])
234
+
235
+ sentiments_df = pd.DataFrame(sentiments)
236
+ sentiments_df.rename(columns = {0:'sentiment'}, inplace = True)
237
+
238
+ sentiments_df["sentiment"] = sentiments_df["sentiment"].apply(lambda x: 100 if x == "positive" else -100 if x=="negative" else 0)
239
+ sentiments_df["roll_avg"] = round(sentiments_df["sentiment"].rolling(5, min_periods = 1).mean(), 2)
240
+ sentiments_df = sentiments_df.tail(12).reset_index()
241
+
242
+ pd.options.plotting.backend = "plotly"
243
+
244
+ fig = sentiments_df["roll_avg"].plot(title="Sentiment Analysis of the last 12 www.marketwatch.com articles about " + ticker,
245
+
246
+ template="plotly_dark",
247
+ labels=dict(index="12 most recent article headlines", value="sentiment score (rolling avg. of window size 5)"))
248
+ fig.update_traces(line=dict(color="#3D9140", width=3))
249
+ fig.update_layout(yaxis_range=[-100,100])
250
+ fig.update_layout(xaxis_range=[0,12])
251
+ fig.update_layout(showlegend=False)
252
+ fig.add_hline(y=0, line_width=1.5, line_color="black")
253
+
254
+ current_sentiment = sentiments_df["roll_avg"].tail(1).values[0]
255
+
256
+ return {'fig': fig, 'current_sentiment': current_sentiment}
257
+
258
+ def LSTM_7_days_price_predictor(self, ticker):
259
+ '''
260
+ This method predicts the price of a chosen stock for the next 7 days as of today, by using the daily adjusted closing
261
+ prices for the last 2 years. At first, a 60-day window of historical prices (i-60) is created as our feature data (x_train)
262
+ and the following 60-days window as label data (y_train). For every stock available, we have manually defined different
263
+ parameters so that they fit as good as it gets to the model. Finally we combute the R2 metric and make the predictions. At
264
+ last, we proceed with the predictions. The model looks back in our data (60 days back) and predicta for the following 7 days.
265
+ '''
266
+
267
+ stock_data = self.stock_data.LSTM_stock_data_getter(ticker)
268
+ stock_data=pd.DataFrame(data=stock_data).drop(['Open','High','Low','Close', 'Volume'],axis=1).reset_index()
269
+ stock_data['Date'] = pd.to_datetime(stock_data['Date'])
270
+ stock_data=stock_data.dropna()
271
+
272
+ # Data Preprocessing
273
+ random.seed(1997)
274
+ close_prices = stock_data['Adj Close']
275
+ values = close_prices.values
276
+ training_data_len = math.ceil(len(values)* 0.8)
277
+
278
+ scaler = MinMaxScaler(feature_range=(0,1))
279
+ scaled_data = scaler.fit_transform(values.reshape(-1,1))
280
+ train_data = scaled_data[0: training_data_len, :]
281
+
282
+ x_train = []
283
+ y_train = []
284
+
285
+ for i in range(60, len(train_data)):
286
+ x_train.append(train_data[i-60:i, 0])
287
+ y_train.append(train_data[i, 0])
288
+
289
+ x_train, y_train = np.array(x_train), np.array(y_train)
290
+ x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
291
+
292
+ # Preparation of test set
293
+ test_data = scaled_data[training_data_len-60: , : ]
294
+ x_test = []
295
+ y_test = values[training_data_len:]
296
+
297
+ for i in range(60, len(test_data)):
298
+ x_test.append(test_data[i-60:i, 0])
299
+
300
+ x_test = np.array(x_test)
301
+ x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
302
+
303
+ ##### Setting Up LSTM Network Architecture and the Training of the LSTM Model
304
+ def LSTM_trainer(seed, DROPOUT, LSTM_units,patience,batch_size, epochs):
305
+
306
+ tf.random.set_seed(seed)
307
+ DROPOUT = DROPOUT
308
+ global model_lstm
309
+ model_lstm = keras.Sequential()
310
+ model_lstm.add(layers.LSTM(LSTM_units, return_sequences=True, input_shape=(x_train.shape[1], 1)))
311
+ model_lstm.add(Dropout(rate=DROPOUT))
312
+ model_lstm.add(layers.LSTM(LSTM_units, return_sequences=False))
313
+ model_lstm.add(Dropout(rate=DROPOUT))
314
+ model_lstm.add(layers.Dense(25))
315
+ model_lstm.add(Dropout(rate=DROPOUT))
316
+ model_lstm.add(layers.Dense(1))
317
+ model_lstm.add(Activation('linear'))
318
+
319
+ print('\n')
320
+ print("Compiling the LSTM Model for the " + str(ticker) + " stock....\n")
321
+ t0 = time.time()
322
+ model_lstm.compile(optimizer='adam', loss='mean_squared_error',metrics=['mae'])
323
+ callback=EarlyStopping(monitor='val_loss',
324
+ min_delta=0,
325
+ patience=patience,
326
+ verbose=1, mode='auto')
327
+ model_lstm.fit(x_train,
328
+ y_train,
329
+ batch_size= batch_size,
330
+ epochs=epochs,
331
+ validation_split=0.1,# ...holding out 10% of the data for validation
332
+ shuffle=True,verbose=0,callbacks=[callback])
333
+ t1 = time.time()
334
+ global ex_time
335
+ ex_time = round(t1-t0, 2)
336
+ print("Compiling took :",ex_time,"seconds")
337
+
338
+ predictions = model_lstm.predict(x_test)
339
+ predictions = scaler.inverse_transform(predictions)
340
+ #rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))
341
+ global r_squared_score
342
+ global rmse
343
+ r_squared_score = round(r2_score(y_test, predictions),2)
344
+ rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))
345
+ #print('Rmse Score: ', round(rmse),2)
346
+ print('R2 Score: ', r_squared_score)
347
+
348
+ if ticker == 'AAPL':
349
+ LSTM_trainer(1, 0.2, 100,2, 20, 30)
350
+ elif ticker == 'NVDA':
351
+ LSTM_trainer(2, 0.2, 100,2, 30, 50)
352
+ elif ticker == 'PYPL':
353
+ LSTM_trainer(6, 0.2, 100,10,25, 30)
354
+ elif ticker == 'MSFT':
355
+ LSTM_trainer(4, 0.1, 80, 2,20, 40)
356
+ elif ticker == 'TSLA':
357
+ LSTM_trainer(5, 0.1, 120, 4,20, 25)
358
+ elif ticker == 'AMZN':
359
+ LSTM_trainer(6, 0.1, 120,2, 20, 25)
360
+ elif ticker == 'SPOT':
361
+ LSTM_trainer(9, 0.2, 200,5, 20, 40)
362
+ #elif ticker == 'TWTR' :
363
+ # LSTM_trainer(15, 0.2, 100,4,20, 40)
364
+ elif ticker == 'UBER':
365
+ LSTM_trainer(15, 0.2, 100,7,20, 40)
366
+ elif ticker == 'ABNB':
367
+ LSTM_trainer(15, 0.2, 120,8,20, 40)
368
+ elif ticker == 'GOOG':
369
+ LSTM_trainer(15, 0.2, 100,3,20, 25)
370
+
371
+ # Unseen Predictions for the next 7 days
372
+ close_data = scaled_data
373
+ look_back = 60
374
+
375
+ def predict(num_prediction, model):
376
+ prediction_list = close_data[-look_back:]
377
+
378
+ for _ in range(num_prediction):
379
+ x = prediction_list[-look_back:]
380
+ x = x.reshape((1, look_back, 1))
381
+
382
+ out = model.predict(x)[0][0]
383
+ prediction_list = np.append(prediction_list, out)
384
+ prediction_list = prediction_list[look_back-1:]
385
+
386
+ return prediction_list
387
+
388
+ def predict_dates(num_prediction):
389
+ last_date = stock_data['Date'].values[-1]
390
+ prediction_dates = pd.date_range(last_date, periods=num_prediction+1).tolist()
391
+ return prediction_dates
392
+
393
+ num_prediction = 7
394
+
395
+ forecast = predict(num_prediction, model_lstm)
396
+ forecast_dates = predict_dates(num_prediction)
397
+
398
+ plt.figure(figsize=(25,10))
399
+ forecast = forecast.reshape(-1, 1)
400
+ forecast_inverse = scaler.inverse_transform(forecast)
401
+
402
+ # Ploting the Actual Prices and the Predictions of them for the next 7 days
403
+ base = stock_data['Date'].iloc[[-1]] # Here we create our base date (the last existing date with actual prices)
404
+ testdata = pd.DataFrame(forecast_inverse)# Here we create a data frame that contains the prediction prices and an empty column for their dates
405
+ testdata['Date'] = ""
406
+ testdata.columns = ["Adj Close","Date"]
407
+ testdata = testdata.iloc[1:,:]
408
+ testdata["Label"] = "" # Let's add a column "Label" that would show if the respective price is a prediction or not
409
+ testdata["Label"] = "Prediction"
410
+ testdata = testdata[["Date", "Adj Close", "Label"]]
411
+
412
+ date_list = [base + datetime.timedelta(days=x+1) for x in range(testdata.shape[0]+1)]
413
+ date_list = pd.DataFrame(date_list)
414
+ date_list.columns = ["Date"]
415
+ date_list.reset_index(inplace = True)
416
+ date_list.drop(["index"], axis = 1, inplace = True)
417
+ date_list.index = date_list.index + 1
418
+ testdata.Date = date_list
419
+
420
+ stock_data["Label"] = ""
421
+ stock_data["Label"] = "Actual price"
422
+ finaldf = pd.concat([stock_data,testdata], axis=0) # Here we concatenate the "testdata" and the original data frame "df" into a final one
423
+ finaldf.reset_index(inplace = True)
424
+ finaldf.drop(["index"], axis = 1, inplace = True)
425
+ finaldf['Date'] = pd.to_datetime(finaldf['Date'])
426
+
427
+ plt.rcParams["figure.figsize"] = (25,10)
428
+ #We create two different data frames, one that contains the actual prices and one that has only the predictions
429
+ finaldfPredictions = finaldf.iloc[-8:]
430
+ finaldfActuals = finaldf.iloc[:-7]
431
+
432
+ plot_1 = go.Scatter(
433
+ x = finaldfActuals['Date'],
434
+ y = finaldfActuals['Adj Close'],
435
+ mode = 'lines',
436
+ name = 'Historical Data (2 years)',
437
+ line=dict(width=1,color='#3D9140'))
438
+ plot_2 = go.Scatter(
439
+ x = finaldfPredictions['Date'],
440
+ y = finaldfPredictions['Adj Close'],
441
+ mode = 'lines',
442
+ name = '7-day Prediction',
443
+ line=dict(width=1,color="#EE3B3B"))
444
+ plot_3 = go.Scatter(
445
+ x = finaldfPredictions['Date'][:1],
446
+ y = finaldfPredictions['Adj Close'][:1],
447
+ mode = 'markers',
448
+ name = 'Latest Actual Closing Price',
449
+ line=dict(width=1))
450
+
451
+ layout = go.Layout(
452
+ title = 'Next 7 days stock price prediction of ' + str(ticker),
453
+ xaxis = {'title' : "Date"},
454
+ yaxis = {'title' : "Price ($)"}
455
+ )
456
+ fig = go.Figure(data=[plot_1, plot_2,plot_3], layout=layout)
457
+ fig.update_layout(template='plotly_dark',autosize=True)
458
+ fig.update_layout(legend=dict(
459
+ orientation="h",
460
+ yanchor="bottom",
461
+ y=1.02,
462
+ xanchor="right",
463
+ x=1),
464
+ annotations = [dict(x=0.5,
465
+ y=0,
466
+ xref='paper',
467
+ yref='paper',
468
+ text="Current In Sample R- Squared : " + str(r_squared_score*100) + " % \n",
469
+ showarrow = False)],
470
+ xaxis=dict(showgrid=False),
471
+ yaxis=dict(showgrid=False)
472
+
473
+
474
+ )
475
+ fig.add_annotation(x=0.5,
476
+ y=0.05,
477
+ xref='paper',
478
+ yref='paper',
479
+ text="Current In Sample Root Mean Square Error : " + str(round(rmse,2)) + " % ",
480
+ showarrow=False)
481
+
482
+ return fig
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ yfinance==0.1.70
2
+ streamlit==1.14.0
3
+ transformers
4
+ bs4
5
+ plotly==5.6.0
6
+ tensorflow==2.8.0
7
+ scikit_learn==1.2.0
8
+ torch
stocks.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from configparser import ParsingError
2
+ from logging import raiseExceptions
3
+ import yfinance as yf
4
+ import requests
5
+ import pandas as pd
6
+ from bs4 import BeautifulSoup
7
+
8
+ class Stock_Data(object):
9
+ '''
10
+ This class contains 5 methods responsible for choosing a stock's ticker, then checking whether the
11
+ stock exchange it is listed in is open or not, and in case it is, it gets data for the last 6 months
12
+ from "yfinance" module of Yahoo Inc. which will be fed to the models.
13
+ '''
14
+
15
+ def Ticker(self, tick):
16
+ '''
17
+ This method will "carry" the company's ticker, and it will also be used as a placeholder.
18
+ '''
19
+ global ticker
20
+ ticker = tick
21
+
22
+ return ticker
23
+
24
+
25
+ def status_getter(self, Ticker):
26
+ '''
27
+ This method gets the company ticker the user chooses, creates a www.marketwatch.com
28
+ link, then scraps the HTML code of the corresponding company page in marketwatch website,
29
+ and gets the current market status of the exchange this stock is listed in. Possible values are:
30
+ After Hours, Open, and Market Closed.
31
+ '''
32
+ global company_ticker
33
+ company_ticker = Ticker
34
+ link_1 = 'https://www.marketwatch.com/investing/stock/'
35
+ link_2 = '?mod=search_symbol'
36
+ # Pasting the above 3 parts to create the URL
37
+ global final_link
38
+ final_link = link_1 + company_ticker + link_2
39
+
40
+ page = requests.get(final_link)
41
+ global soup
42
+ soup = BeautifulSoup(page.text, "lxml")
43
+ if soup is None:
44
+ raise ParsingError("HTML code of MarketWatch website was not scraped and current status can not be found")
45
+ else:
46
+ current_status = soup.find("div", class_="status").text # Finding the market status
47
+ return current_status
48
+
49
+
50
+ def current_price_getter(self, Ticker):
51
+ '''
52
+ This method will get the current price only if the market is open.
53
+ '''
54
+ current_price = None
55
+ if self.status_getter(Ticker) == "Open":
56
+ current_price = float(soup.find("bg-quote", class_="value").text.replace(',',''))
57
+ return current_price
58
+ else:
59
+ return "Market Closed"
60
+
61
+ def stock_data_getter(self, Ticker):
62
+ '''
63
+ This method will return a dataframe containing Stock data from the Yahoo's "yfinance"
64
+ library in case the market is open.
65
+ '''
66
+ if self.status_getter(Ticker) == "Open":
67
+ data = yf.download(tickers = str(Ticker), period = "6mo", interval = "1d")
68
+ df = pd.DataFrame(data)
69
+ return df
70
+ else:
71
+ return "Market Closed"
72
+
73
+ def LSTM_stock_data_getter(self, Ticker):
74
+ '''
75
+ This method will return a dataframe containing Stock data from the Yahoo's "yfinance"
76
+ library regardrless of whether the market is open or not, and will feed the LSTM model.
77
+ '''
78
+ data = yf.download(tickers = str(Ticker), period = "2y", interval = "1d")
79
+ df = pd.DataFrame(data)
80
+
81
+ # Prediction in the data we evaluate the model
82
+ # If the user wants to run the model with the data that has been evaluated and predicted for , uncomment the 2 lines below
83
+ # Setting the start = 2022-08-26 and end = 2020-08-26 Yahoo Finance will return data from 25-8-2020 to 25-8-2022 (2 years period).
84
+ # In those data our model has been evaluated.
85
+
86
+ #data = yf.download(tickers = str(Ticker),end="2022-08-26", start="2020-08-26")
87
+ #df = pd.DataFrame(data)
88
+
89
+ return df
90
+
91
+
92
+ def article_parser(self, ticker):
93
+ '''
94
+ This method gets as input a stock ticker, creates the www.marketwatch.com link of this stock
95
+ and returns a dataframe with the last 17 articles' headers.
96
+ '''
97
+ company_ticker = self.Ticker(tick=ticker)
98
+ link_1 = 'https://www.marketwatch.com/investing/stock/'
99
+ link_2 = '?mod=search_symbol'
100
+ # Pasting the above 3 parts to create the URL
101
+ final_link = link_1 + company_ticker + link_2
102
+
103
+
104
+ page = requests.get(final_link)
105
+ soup = BeautifulSoup(page.content, "html.parser")
106
+ results = soup.find("div", class_="tab__pane is-active j-tabPane")
107
+ articles = results.find_all("a", class_="link")
108
+
109
+ headerList = ["ticker", "headline"]
110
+ rows = []
111
+ counter = 1
112
+ df_headers = pd.DataFrame()
113
+
114
+ for art in articles:
115
+ if counter <= 17:
116
+ ticker = company_ticker
117
+ title = art.text.strip()
118
+ if title is None:
119
+ break
120
+ rows.append([ticker, title])
121
+ counter = counter + 1
122
+
123
+ df_headers = pd.DataFrame(rows, columns=headerList)
124
+
125
+ return df_headers
126
+