← Back to Libraries
scikit-learn for Trading
Apply machine learning to trading with scikit-learn. Predict price movements, classify market regimes, and build intelligent trading strategies.
Difficulty: Advanced
Category: Machine Learning
Installation
$ pip install scikit-learn
Code Examples
Feature Engineering for Trading
Python
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import StandardScaler
# Download data
df = yf.download("EURUSD=X", period="2y")
# Create features
df['Returns'] = df['Close'].pct_change()
df['SMA_10'] = df['Close'].rolling(10).mean()
df['SMA_50'] = df['Close'].rolling(50).mean()
df['RSI'] = calculate_rsi(df['Close'], 14)
df['ATR'] = calculate_atr(df, 14)
# Lag features
for i in range(1, 6):
df[f'Returns_Lag_{i}'] = df['Returns'].shift(i)
# Target: Next day return direction
df['Target'] = (df['Returns'].shift(-1) > 0).astype(int)
# Drop NaN
df = df.dropna()
# Scale features
scaler = StandardScaler()
feature_cols = ['SMA_10', 'SMA_50', 'RSI', 'ATR'] + [f'Returns_Lag_{i}' for i in range(1, 6)]
df[feature_cols] = scaler.fit_transform(df[feature_cols])
print(df[feature_cols + ['Target']].head())
Price Direction Classification
Python
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
# Prepare data
X = df[feature_cols]
y = df['Target']
# Train/test split (time series aware)
split_idx = int(len(df) * 0.8)
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]
# Train Random Forest
rf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
rf.fit(X_train, y_train)
# Predict
y_pred = rf.predict(X_test)
# Evaluate
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2%}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
# Feature importance
importance = pd.DataFrame({
'Feature': feature_cols,
'Importance': rf.feature_importances_
}).sort_values('Importance', ascending=False)
print("\nFeature Importance:")
print(importance)
Backtest ML Strategy
Python
# Generate trading signals from predictions
df_test = df[split_idx:].copy()
df_test['Prediction'] = y_pred
df_test['Signal'] = df_test['Prediction'].map({1: 1, 0: -1})
# Calculate strategy returns
df_test['Strategy_Returns'] = df_test['Signal'].shift(1) * df_test['Returns']
# Performance metrics
total_return = (1 + df_test['Strategy_Returns']).prod() - 1
sharpe = df_test['Strategy_Returns'].mean() / df_test['Strategy_Returns'].std() * (252**0.5)
print(f"ML Strategy Return: {total_return*100:.2f}%")
print(f"Sharpe Ratio: {sharpe:.2f}")
print(f"Accuracy: {accuracy:.2%}")
Common ML Models for Trading
- Random Forest: Robust, handles non-linear relationships
- Gradient Boosting: High accuracy, feature importance
- SVM: Good for classification, kernel tricks