solanaexpert
/

MLCryptoForecaster

Model card Files Files and versions

MLCryptoForecaster / MLCryptoForecaster.py

solanaexpert's picture

Create MLCryptoForecaster.py

92e4cbc verified 11 months ago

history blame contribute delete

3.61 kB

	import os
	import pandas as pd
	import numpy as np
	from datetime import datetime, timedelta
	from binance.client import Client
	from sklearn.model_selection import train_test_split
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.metrics import classification_report
	import ta

	# Connect to Binance (Fill your own API keys if live)
	# client = Client(api_key, api_secret)
	client = Client()

	# File to store the historical data
	DATA_FILE = "btc_data.csv"
	symbol = "BTCUSDT"
	interval = Client.KLINE_INTERVAL_4HOUR

	# Load existing data or download fresh
	if os.path.exists(DATA_FILE):
	print("Loading existing data...")
	df = pd.read_csv(DATA_FILE, index_col=0, parse_dates=True)
	last_timestamp = df.index[-1]
	# Binance gives data in 15min intervals, so move forward
	start_time = last_timestamp + timedelta(minutes=15)
	start_str = start_time.strftime("%d %B %Y %H:%M:%S")

	print(f"Downloading new data from {start_str}...")
	new_klines = client.get_historical_klines(symbol, interval, start_str)
	if new_klines:
	new_df = pd.DataFrame(new_klines, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume',
	'close_time', 'quote_av', 'trades', 'tb_base_av', 'tb_quote_av', 'ignore'])
	new_df = new_df[['timestamp', 'open', 'high', 'low', 'close', 'volume']]
	new_df[['open', 'high', 'low', 'close', 'volume']] = new_df[['open', 'high', 'low', 'close', 'volume']].astype(float)
	new_df['timestamp'] = pd.to_datetime(new_df['timestamp'], unit='ms')
	new_df = new_df.set_index('timestamp')

	# Append and remove any duplicates (just in case)
	df = pd.concat([df, new_df])
	df = df[~df.index.duplicated(keep='first')]
	df.to_csv(DATA_FILE)
	else:
	print("Downloading all data from scratch...")
	klinesT = client.get_historical_klines(symbol, interval, "01 December 2021")
	df = pd.DataFrame(klinesT, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume',
	'close_time', 'quote_av', 'trades', 'tb_base_av', 'tb_quote_av', 'ignore'])
	df = df[['timestamp', 'open', 'high', 'low', 'close', 'volume']]
	df[['open', 'high', 'low', 'close', 'volume']] = df[['open', 'high', 'low', 'close', 'volume']].astype(float)
	df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
	df = df.set_index('timestamp')
	df.to_csv(DATA_FILE)

	# Feature Engineering: Add technical indicators
	df['rsi'] = ta.momentum.RSIIndicator(df['close'], window=14).rsi()
	df['sma_fast'] = df['close'].rolling(window=5).mean()
	df['sma_slow'] = df['close'].rolling(window=20).mean()
	df['macd'] = ta.trend.MACD(df['close']).macd()
	df['ema'] = df['close'].ewm(span=10, adjust=False).mean()

	# Create target: 1 if next close > current close, else 0
	df['target'] = np.where(df['close'].shift(-1) > df['close'], 1, 0)

	# Drop rows with NaN values
	df = df.dropna()

	# Features and Target
	features = ['rsi', 'sma_fast', 'sma_slow', 'macd', 'ema']
	X = df[features]
	y = df['target']

	# Train/Test split
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

	# Train Random Forest
	model = RandomForestClassifier(n_estimators=100, random_state=42)
	model.fit(X_train, y_train)

	# Evaluate
	y_pred = model.predict(X_test)
	print(classification_report(y_test, y_pred))

	# Predict next movement
	latest_features = X.iloc[-1].values.reshape(1, -1)
	predicted_direction = model.predict(latest_features)
	print(f"Predicted next movement: {'UP' if predicted_direction[0] == 1 else 'DOWN'}")