Boltuzamaki's picture
minor edit
417c32f
raw
history blame
4.79 kB
import numpy as np
import pandas as pd
import plotly.express as px
import streamlit as st
import yfinance as yf
from sklearn import preprocessing
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
# Streamlit app
st.title("CUSTOM Stock Price Prediction πŸ’°")
st.write(
'This model predicts based on trends. It may not perform well with volatile history. Setting the time frame to "max" is recommended. Your predicted days value cannot exceed the time frame days. Have fun!'
)
# Input widgets
stock = st.text_input("Stock ticker symbol", value="NVDA")
daysago = st.text_input(
'Time frame in days (write "max" for maximum time)', value="max"
)
forecast_out = st.number_input("Predicted days", value=24, min_value=1)
forecast_col = "Close"
def prepare_data(df, forecast_col, forecast_out):
# Check if the dataframe is empty or too small for forecast
if df.empty or len(df) <= forecast_out:
st.error("Insufficient data available for the given forecast period.")
return None, None, None, None, None
label = df[forecast_col].shift(
-forecast_out
) # Shift the column to create a future prediction label
X = np.array(df[[forecast_col]]) # Create feature array
# Check if X has enough samples
if X.shape[0] == 0:
st.error("No data available for scaling.")
return None, None, None, None, None
X = preprocessing.scale(X) # Scale the feature array
X_lately = X[-forecast_out:] # The data that will be predicted on
X = X[:-forecast_out] # Training data
label.dropna(inplace=True) # Drop NaN values from the label
y = np.array(label) # Target array
# Check if we have enough data for train-test split
if len(X) < 2 or len(y) < 2: # Need at least two samples to split
st.error(
"Not enough data for splitting into training and testing sets. Please adjust the date range or prediction period."
)
return None, None, None, None, None
X_train, X_test, Y_train, Y_test = train_test_split(
X, y, test_size=0.2, random_state=42
) # Train/test split
return X_train, X_test, Y_train, Y_test, X_lately
# Button to trigger model generation and prediction
if st.button("Generate"):
# Ensure the days are in proper format
if daysago != "max":
try:
# Convert daysago to the correct format recognized by yfinance
daysago = str(int(daysago)) + "d"
except ValueError:
st.error("Invalid time frame. Please enter a number or 'max'.")
st.stop()
# Fetch stock data
ticker = yf.Ticker(stock)
data = ticker.history(period=daysago)
if data.empty:
st.error(
"Failed to retrieve data for the ticker symbol. Please check the stock symbol and try again."
)
else:
X_train, X_test, Y_train, Y_test, X_lately = prepare_data(
data, forecast_col, forecast_out
) # Call data preparation method
if X_train is not None:
# Model generation
learner = LinearRegression()
learner.fit(X_train, Y_train)
score = learner.score(X_test, Y_test)
forecast = learner.predict(X_lately)
st.write("Accuracy Score:", score)
# Create a DataFrame with future dates and predicted values
future_dates = pd.date_range(
start=data.index[-1] + pd.Timedelta(days=1),
periods=forecast_out,
freq="D",
)
predicted_data = pd.DataFrame(
{"Date": future_dates, "Predicted Close": forecast}
)
# Concatenate original data and predicted data
combined_data = pd.concat(
[
data.rename(columns={"Close": "Actual Close"}),
predicted_data.set_index("Date"),
],
axis=1,
)
# Plot original and predicted stock prices
fig = px.line(
combined_data,
x=combined_data.index,
y=["Actual Close", "Predicted Close"],
title=f"Predicted {stock} Stock Prices",
)
fig.update_layout(
xaxis_title="Date", yaxis_title="Price", legend_title_text=""
)
# Set line colors
fig.data[1].line.color = "orange"
st.plotly_chart(fig)
st.write(
"Findings: Linear Regression often performs poorly on volatile stock prices, so this model may not be highly accurate for certain stocks. Consider using deep learning methods for improved accuracy on volatile stocks."
)