|
import numpy as np |
|
import pandas as pd |
|
import plotly.express as px |
|
import streamlit as st |
|
import yfinance as yf |
|
from sklearn import preprocessing |
|
from sklearn.linear_model import LinearRegression |
|
from sklearn.model_selection import train_test_split |
|
|
|
|
|
st.title("CUSTOM Stock Price Prediction π°") |
|
st.write( |
|
'This model predicts based on trends. It may not perform well with volatile history. Setting the time frame to "max" is recommended. Your predicted days value cannot exceed the time frame days. Have fun!' |
|
) |
|
|
|
|
|
stock = st.text_input("Stock ticker symbol", value="NVDA") |
|
daysago = st.text_input( |
|
'Time frame in days (write "max" for maximum time)', value="max" |
|
) |
|
forecast_out = st.number_input("Predicted days", value=24, min_value=1) |
|
forecast_col = "Close" |
|
|
|
|
|
def prepare_data(df, forecast_col, forecast_out): |
|
|
|
if df.empty or len(df) <= forecast_out: |
|
st.error("Insufficient data available for the given forecast period.") |
|
return None, None, None, None, None |
|
|
|
label = df[forecast_col].shift( |
|
-forecast_out |
|
) |
|
X = np.array(df[[forecast_col]]) |
|
|
|
|
|
if X.shape[0] == 0: |
|
st.error("No data available for scaling.") |
|
return None, None, None, None, None |
|
|
|
X = preprocessing.scale(X) |
|
X_lately = X[-forecast_out:] |
|
X = X[:-forecast_out] |
|
label.dropna(inplace=True) |
|
|
|
y = np.array(label) |
|
|
|
|
|
if len(X) < 2 or len(y) < 2: |
|
st.error( |
|
"Not enough data for splitting into training and testing sets. Please adjust the date range or prediction period." |
|
) |
|
return None, None, None, None, None |
|
|
|
X_train, X_test, Y_train, Y_test = train_test_split( |
|
X, y, test_size=0.2, random_state=42 |
|
) |
|
|
|
return X_train, X_test, Y_train, Y_test, X_lately |
|
|
|
|
|
|
|
if st.button("Generate"): |
|
|
|
if daysago != "max": |
|
try: |
|
|
|
daysago = str(int(daysago)) + "d" |
|
except ValueError: |
|
st.error("Invalid time frame. Please enter a number or 'max'.") |
|
st.stop() |
|
|
|
|
|
ticker = yf.Ticker(stock) |
|
data = ticker.history(period=daysago) |
|
|
|
if data.empty: |
|
st.error( |
|
"Failed to retrieve data for the ticker symbol. Please check the stock symbol and try again." |
|
) |
|
else: |
|
X_train, X_test, Y_train, Y_test, X_lately = prepare_data( |
|
data, forecast_col, forecast_out |
|
) |
|
|
|
if X_train is not None: |
|
|
|
learner = LinearRegression() |
|
learner.fit(X_train, Y_train) |
|
score = learner.score(X_test, Y_test) |
|
forecast = learner.predict(X_lately) |
|
|
|
st.write("Accuracy Score:", score) |
|
|
|
|
|
future_dates = pd.date_range( |
|
start=data.index[-1] + pd.Timedelta(days=1), |
|
periods=forecast_out, |
|
freq="D", |
|
) |
|
predicted_data = pd.DataFrame( |
|
{"Date": future_dates, "Predicted Close": forecast} |
|
) |
|
|
|
|
|
combined_data = pd.concat( |
|
[ |
|
data.rename(columns={"Close": "Actual Close"}), |
|
predicted_data.set_index("Date"), |
|
], |
|
axis=1, |
|
) |
|
|
|
|
|
fig = px.line( |
|
combined_data, |
|
x=combined_data.index, |
|
y=["Actual Close", "Predicted Close"], |
|
title=f"Predicted {stock} Stock Prices", |
|
) |
|
fig.update_layout( |
|
xaxis_title="Date", yaxis_title="Price", legend_title_text="" |
|
) |
|
|
|
|
|
fig.data[1].line.color = "orange" |
|
|
|
st.plotly_chart(fig) |
|
|
|
st.write( |
|
"Findings: Linear Regression often performs poorly on volatile stock prices, so this model may not be highly accurate for certain stocks. Consider using deep learning methods for improved accuracy on volatile stocks." |
|
) |
|
|