Spaces:
Sleeping
Sleeping
import streamlit as st | |
import yfinance as yf | |
import pandas as pd | |
from sklearn.linear_model import LinearRegression | |
from sklearn.ensemble import RandomForestRegressor | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.model_selection import train_test_split | |
from sklearn.impute import SimpleImputer | |
from datetime import datetime, timedelta | |
import numpy as np | |
# Function to compute RSI | |
def compute_rsi(data, window): | |
diff = data.diff(1).dropna() | |
gain = diff.where(diff > 0, 0) | |
loss = -diff.where(diff < 0, 0) | |
avg_gain = gain.rolling(window=window, min_periods=1).mean() | |
avg_loss = loss.rolling(window=window, min_periods=1).mean() | |
rs = avg_gain / avg_loss | |
rsi = 100 - (100 / (1 + rs)) | |
return rsi | |
# Set up the Streamlit app | |
st.title("Stock Price Prediction") | |
st.write("This app uses historical data to predict future stock prices.") | |
# User input for stock ticker symbol | |
ticker = st.text_input("Enter the stock ticker symbol:", value='AAPL') | |
# Fetch stock historical data using yfinance | |
stock_data = yf.download(ticker, start='2020-01-01', end=datetime.today().strftime('%Y-%m-%d')) | |
stock_data.reset_index(inplace=True) | |
# Display the historical data | |
st.write(f"Historical Data for {ticker}") | |
st.dataframe(stock_data.tail()) | |
# Feature engineering | |
stock_data['MA_10'] = stock_data['Close'].rolling(window=10).mean() | |
stock_data['MA_50'] = stock_data['Close'].rolling(window=50).mean() | |
stock_data['RSI'] = compute_rsi(stock_data['Close'], window=14) | |
stock_data['Return'] = stock_data['Close'].pct_change() | |
stock_data.dropna(inplace=True) | |
# Prepare features and target variable | |
X = stock_data[['Open', 'High', 'Low', 'Volume', 'MA_10', 'MA_50', 'RSI', 'Return']] | |
y = stock_data['Close'] | |
# Handle missing values | |
imputer = SimpleImputer(strategy='mean') | |
X = imputer.fit_transform(X) | |
# Split the data | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
# Scale the features | |
scaler = StandardScaler() | |
X_train_scaled = scaler.fit_transform(X_train) | |
X_test_scaled = scaler.transform(X_test) | |
# Train the Linear Regression model | |
lr_model = LinearRegression() | |
lr_model.fit(X_train_scaled, y_train) | |
# Train the Random Forest model | |
rf_model = RandomForestRegressor(n_estimators=100, random_state=42) | |
rf_model.fit(X_train_scaled, y_train) | |
# Predict future prices using ensemble method | |
future_dates = [stock_data['Date'].iloc[-1] + timedelta(days=x) for x in range(1, 15)] | |
future_df = pd.DataFrame(index=future_dates, columns=stock_data.columns) | |
future_df['Open'] = stock_data['Open'].iloc[-1] | |
future_df['High'] = stock_data['High'].iloc[-1] | |
future_df['Low'] = stock_data['Low'].iloc[-1] | |
future_df['Volume'] = stock_data['Volume'].iloc[-1] | |
future_df['MA_10'] = stock_data['MA_10'].iloc[-1] | |
future_df['MA_50'] = stock_data['MA_50'].iloc[-1] | |
future_df['RSI'] = stock_data['RSI'].iloc[-1] | |
future_df['Return'] = stock_data['Return'].iloc[-1] | |
future_df = future_df.fillna(method='ffill') | |
# Handle missing values in future data | |
future_X = imputer.transform(future_df[['Open', 'High', 'Low', 'Volume', 'MA_10', 'MA_50', 'RSI', 'Return']]) | |
future_X_scaled = scaler.transform(future_X) | |
lr_predictions = lr_model.predict(future_X_scaled) | |
rf_predictions = rf_model.predict(future_X_scaled) | |
# Combine predictions (average) | |
combined_predictions = (lr_predictions + rf_predictions) / 2 | |
# Display predictions | |
predictions_df = pd.DataFrame({'Date': future_dates, 'Predicted Close': combined_predictions}) | |
predictions_df.set_index('Date', inplace=True) | |
st.write(f"Future Price Predictions for {ticker}") | |
st.dataframe(predictions_df) | |
# Plot the predictions | |
st.line_chart(predictions_df['Predicted Close']) | |