import streamlit as st import yfinance as yf import pandas as pd from sklearn.linear_model import LinearRegression from sklearn.ensemble import RandomForestRegressor from sklearn.preprocessing import StandardScaler from sklearn.model_selection import train_test_split from sklearn.impute import SimpleImputer from datetime import datetime, timedelta import numpy as np # Function to compute RSI def compute_rsi(data, window): diff = data.diff(1).dropna() gain = diff.where(diff > 0, 0) loss = -diff.where(diff < 0, 0) avg_gain = gain.rolling(window=window, min_periods=1).mean() avg_loss = loss.rolling(window=window, min_periods=1).mean() rs = avg_gain / avg_loss rsi = 100 - (100 / (1 + rs)) return rsi # Set up the Streamlit app st.title("Stock Price Prediction") st.write("This app uses historical data to predict future stock prices.") # User input for stock ticker symbol ticker = st.text_input("Enter the stock ticker symbol:", value='AAPL') # Fetch stock historical data using yfinance stock_data = yf.download(ticker, start='2020-01-01', end=datetime.today().strftime('%Y-%m-%d')) stock_data.reset_index(inplace=True) # Display the historical data st.write(f"Historical Data for {ticker}") st.dataframe(stock_data.tail()) # Feature engineering stock_data['MA_10'] = stock_data['Close'].rolling(window=10).mean() stock_data['MA_50'] = stock_data['Close'].rolling(window=50).mean() stock_data['RSI'] = compute_rsi(stock_data['Close'], window=14) stock_data['Return'] = stock_data['Close'].pct_change() stock_data.dropna(inplace=True) # Prepare features and target variable X = stock_data[['Open', 'High', 'Low', 'Volume', 'MA_10', 'MA_50', 'RSI', 'Return']] y = stock_data['Close'] # Handle missing values imputer = SimpleImputer(strategy='mean') X = imputer.fit_transform(X) # Split the data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Scale the features scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) # Train the Linear Regression model lr_model = LinearRegression() lr_model.fit(X_train_scaled, y_train) # Train the Random Forest model rf_model = RandomForestRegressor(n_estimators=100, random_state=42) rf_model.fit(X_train_scaled, y_train) # Predict future prices using ensemble method future_dates = [stock_data['Date'].iloc[-1] + timedelta(days=x) for x in range(1, 15)] future_df = pd.DataFrame(index=future_dates, columns=stock_data.columns) future_df['Open'] = stock_data['Open'].iloc[-1] future_df['High'] = stock_data['High'].iloc[-1] future_df['Low'] = stock_data['Low'].iloc[-1] future_df['Volume'] = stock_data['Volume'].iloc[-1] future_df['MA_10'] = stock_data['MA_10'].iloc[-1] future_df['MA_50'] = stock_data['MA_50'].iloc[-1] future_df['RSI'] = stock_data['RSI'].iloc[-1] future_df['Return'] = stock_data['Return'].iloc[-1] future_df = future_df.fillna(method='ffill') # Handle missing values in future data future_X = imputer.transform(future_df[['Open', 'High', 'Low', 'Volume', 'MA_10', 'MA_50', 'RSI', 'Return']]) future_X_scaled = scaler.transform(future_X) lr_predictions = lr_model.predict(future_X_scaled) rf_predictions = rf_model.predict(future_X_scaled) # Combine predictions (average) combined_predictions = (lr_predictions + rf_predictions) / 2 # Display predictions predictions_df = pd.DataFrame({'Date': future_dates, 'Predicted Close': combined_predictions}) predictions_df.set_index('Date', inplace=True) st.write(f"Future Price Predictions for {ticker}") st.dataframe(predictions_df) # Plot the predictions st.line_chart(predictions_df['Predicted Close'])