import os import yfinance as yf import pandas as pd from newsapi import NewsApiClient from transformers import pipeline import tensorflow as tf from tensorflow import keras from sklearn.preprocessing import MinMaxScaler import numpy as np from datetime import datetime, timedelta import alpaca_trade_api as tradeapi import logging # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Load environment variables with fallback NEWSAPI_KEY = os.getenv('NEWSAPI_KEY', 'your_newsapi_key') ALPACA_API_KEY = os.getenv('ALPACA_API_KEY', 'your_alpaca_api_key') ALPACA_SECRET_KEY = os.getenv('ALPACA_SECRET_KEY', 'your_alpaca_secret_key') APCA_API_KEY_ID = os.getenv('APCA_API_KEY_ID', ALPACA_API_KEY) APCA_API_SECRET_KEY = os.getenv('APCA_API_SECRET_KEY', ALPACA_SECRET_KEY) # Check if all necessary keys are available if not all([NEWSAPI_KEY, APCA_API_KEY_ID, APCA_API_SECRET_KEY]): raise ValueError("Ensure all API keys and secret keys are set as environment variables.") # Initialize NewsAPI client newsapi = NewsApiClient(api_key=NEWSAPI_KEY) # Initialize Alpaca Trade API client alpaca_api = tradeapi.REST(APCA_API_KEY_ID, APCA_API_SECRET_KEY, base_url='https://paper-api.alpaca.markets') def collect_market_data(ticker): data = yf.download(ticker, start=(datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d'), end=datetime.now().strftime('%Y-%m-%d')) data.to_csv(f'{ticker}_market_data.csv') logger.info(f'Market data for {ticker} collected successfully.') def collect_news_data(query, from_date, to_date): all_articles = newsapi.get_everything(q=query, from_param=from_date, to=to_date, language='en', sort_by='relevancy') if all_articles['status'] == 'ok': articles_df = pd.DataFrame(all_articles['articles']) articles_df.to_csv('news_data.csv') logger.info(f'News data for {query} collected successfully.') else: logger.error(f'Error collecting news data: {all_articles["message"]}') def perform_sentiment_analysis(): sentiment_pipeline = pipeline("sentiment-analysis") try: news_data = pd.read_csv('news_data.csv') news_data['sentiment'] = news_data['description'].apply(lambda x: sentiment_pipeline(x)[0]['label'] if pd.notna(x) else 'NEUTRAL') news_data.to_csv('sentiment_data.csv', index=False) logger.info('Sentiment analysis performed successfully.') except Exception as e: logger.error(f'Error performing sentiment analysis: {e}') def train_price_prediction_model(ticker): data = pd.read_csv(f'{ticker}_market_data.csv') data = data[['Date', 'Close']].set_index('Date') scaler = MinMaxScaler(feature_range=(0, 1)) scaled_data = scaler.fit_transform(data) X = [] y = [] for i in range(60, len(scaled_data)): X.append(scaled_data[i-60:i, 0]) y.append(scaled_data[i, 0]) X = np.array(X) y = np.array(y) X = np.reshape(X, (X.shape[0], X.shape[1], 1)) model = keras.Sequential([ keras.layers.LSTM(50, return_sequences=True, input_shape=(X.shape[1], 1)), keras.layers.LSTM(50, return_sequences=False), keras.layers.Dense(25), keras.layers.Dense(1) ]) model.compile(optimizer='adam', loss='mean_squared_error') model.fit(X, y, batch_size=1, epochs=1) model.save(f'{ticker}_price_prediction_model.h5') logger.info('Price prediction model trained successfully.') def make_trade_decision(ticker): model = keras.models.load_model(f'{ticker}_price_prediction_model.h5') data = pd.read_csv(f'{ticker}_market_data.csv') last_60_days = data['Close'].tail(60).values last_60_days_scaled = MinMaxScaler(feature_range=(0, 1)).fit_transform(last_60_days.reshape(-1, 1)) X_test = [] X_test.append(last_60_days_scaled) X_test = np.array(X_test) X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1)) predicted_price = model.predict(X_test) predicted_price = MinMaxScaler(feature_range=(0, 1)).inverse_transform(predicted_price) current_price = yf.download(ticker, period='1d')['Close'].values[0] if predicted_price > current_price: alpaca_api.submit_order( symbol=ticker, qty=1, side='buy', type='market', time_in_force='gtc' ) logger.info(f'Bought 1 share of {ticker}') else: alpaca_api.submit_order( symbol=ticker, qty=1, side='sell', type='market', time_in_force='gtc' ) logger.info(f'Sold 1 share of {ticker}') if __name__ == "__main__": TICKER = 'AAPL' QUERY = 'Apple Inc' FROM_DATE = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d') TO_DATE = datetime.now().strftime('%Y-%m-%d') collect_market_data(TICKER) collect_news_data(QUERY, FROM_DATE, TO_DATE) perform_sentiment_analysis() train_price_prediction_model(TICKER) make_trade_decision(TICKER)