Spaces:

BetterMindLabs
/

Stock_Market_Prediction

Sleeping

File size: 4,786 Bytes

import numpy as np
import pandas as pd
import plotly.express as px
import streamlit as st
import yfinance as yf
from sklearn import preprocessing
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

# Streamlit app
st.title("CUSTOM Stock Price Prediction 💰")
st.write(
    'This model predicts based on trends. It may not perform well with volatile history. Setting the time frame to "max" is recommended. Your predicted days value cannot exceed the time frame days. Have fun!'
)

# Input widgets
stock = st.text_input("Stock ticker symbol", value="NVDA")
daysago = st.text_input(
    'Time frame in days (write "max" for maximum time)', value="max"
)
forecast_out = st.number_input("Predicted days", value=24, min_value=1)
forecast_col = "Close"


def prepare_data(df, forecast_col, forecast_out):
    # Check if the dataframe is empty or too small for forecast
    if df.empty or len(df) <= forecast_out:
        st.error("Insufficient data available for the given forecast period.")
        return None, None, None, None, None

    label = df[forecast_col].shift(
        -forecast_out
    )  # Shift the column to create a future prediction label
    X = np.array(df[[forecast_col]])  # Create feature array

    # Check if X has enough samples
    if X.shape[0] == 0:
        st.error("No data available for scaling.")
        return None, None, None, None, None

    X = preprocessing.scale(X)  # Scale the feature array
    X_lately = X[-forecast_out:]  # The data that will be predicted on
    X = X[:-forecast_out]  # Training data
    label.dropna(inplace=True)  # Drop NaN values from the label

    y = np.array(label)  # Target array

    # Check if we have enough data for train-test split
    if len(X) < 2 or len(y) < 2:  # Need at least two samples to split
        st.error(
            "Not enough data for splitting into training and testing sets. Please adjust the date range or prediction period."
        )
        return None, None, None, None, None

    X_train, X_test, Y_train, Y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )  # Train/test split

    return X_train, X_test, Y_train, Y_test, X_lately


# Button to trigger model generation and prediction
if st.button("Generate"):
    # Ensure the days are in proper format
    if daysago != "max":
        try:
            # Convert daysago to the correct format recognized by yfinance
            daysago = str(int(daysago)) + "d"
        except ValueError:
            st.error("Invalid time frame. Please enter a number or 'max'.")
            st.stop()

    # Fetch stock data
    ticker = yf.Ticker(stock)
    data = ticker.history(period=daysago)

    if data.empty:
        st.error(
            "Failed to retrieve data for the ticker symbol. Please check the stock symbol and try again."
        )
    else:
        X_train, X_test, Y_train, Y_test, X_lately = prepare_data(
            data, forecast_col, forecast_out
        )  # Call data preparation method

        if X_train is not None:
            # Model generation
            learner = LinearRegression()
            learner.fit(X_train, Y_train)
            score = learner.score(X_test, Y_test)
            forecast = learner.predict(X_lately)

            st.write("Accuracy Score:", score)

            # Create a DataFrame with future dates and predicted values
            future_dates = pd.date_range(
                start=data.index[-1] + pd.Timedelta(days=1),
                periods=forecast_out,
                freq="D",
            )
            predicted_data = pd.DataFrame(
                {"Date": future_dates, "Predicted Close": forecast}
            )

            # Concatenate original data and predicted data
            combined_data = pd.concat(
                [
                    data.rename(columns={"Close": "Actual Close"}),
                    predicted_data.set_index("Date"),
                ],
                axis=1,
            )

            # Plot original and predicted stock prices
            fig = px.line(
                combined_data,
                x=combined_data.index,
                y=["Actual Close", "Predicted Close"],
                title=f"Predicted {stock} Stock Prices",
            )
            fig.update_layout(
                xaxis_title="Date", yaxis_title="Price", legend_title_text=""
            )

            # Set line colors
            fig.data[1].line.color = "orange"

            st.plotly_chart(fig)

            st.write(
                "Findings: Linear Regression often performs poorly on volatile stock prices, so this model may not be highly accurate for certain stocks. Consider using deep learning methods for improved accuracy on volatile stocks."
            )