P
PipsGrowth

pandas لتحليل البيانات

مكتبة Python الأساسية لمعالجة وتحليل البيانات المالية. مثالية للتعامل مع السلاسل الزمنية وبيانات OHLCV وحسابات المحافظ.

الصعوبة: مبتدئ
الفئة: بيانات وتحليل
⭐ الأكثر شعبية

التثبيت

Install pandas using pip. It's recommended to also install NumPy and Matplotlib for full functionality.

# Install pandas
$ pip install pandas
# Install with common trading libraries
$ pip install pandas numpy matplotlib yfinance

الميزات الرئيسية

DataFrame سريع

معالجة بيانات عالية الأداء مع فهرسة زمنية مدمجة.

بيانات OHLCV

دعم أصلي لبيانات الأسعار مع إعادة تحديد الإطار الزمني.

تكامل مع المؤشرات

يعمل مباشرة مع TA-Lib و pandas-ta وغيرها.

تصدير مرن

تصدير إلى CSV و Excel و JSON و SQL والمزيد.

أمثلة الكود

Load and Clean OHLCV Data

Download and prepare forex data for analysis

Python
import pandas as pd
import yfinance as yf
# Download EURUSD data
df = yf.download('EURUSD=X', start='2023-01-01', end='2024-01-01', interval='1d')
# Clean data
df = df.dropna() # Remove missing values
df = df[df['Volume'] > 0] # Remove zero volume bars
# Add date features
df['Year'] = df.index.year
df['Month'] = df.index.month
df['DayOfWeek'] = df.index.dayofweek
print(df.head())
print(f"Total rows: {len(df)}")

Calculate Moving Averages

Implement SMA crossover strategy

Python
# Calculate Simple Moving Averages
df['SMA_50'] = df['Close'].rolling(window=50).mean()
df['SMA_200'] = df['Close'].rolling(window=200).mean()
# Generate signals
df['Signal'] = 0
df.loc[df['SMA_50'] > df['SMA_200'], 'Signal'] = 1 # Buy
df.loc[df['SMA_50'] < df['SMA_200'], 'Signal'] = -1 # Sell
# Detect crossovers
df['Position'] = df['Signal'].diff()
df.loc[df['Position'] == 2, 'Crossover'] = 'Golden Cross'
df.loc[df['Position'] == -2, 'Crossover'] = 'Death Cross'
# View signals
signals = df[df['Crossover'].notna()][['Close', 'SMA_50', 'SMA_200', 'Crossover']]
print(signals)

Calculate RSI Indicator

Relative Strength Index from scratch

Python
def calculate_rsi(data, period=14):
"""Calculate RSI indicator"""
delta = data['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=period).mean()
avg_loss = loss.rolling(window=period).mean()
rs = avg_gain / avg_loss
rsi = 100 - (100 / (1 + rs))
return rsi
# Apply RSI
df['RSI'] = calculate_rsi(df)
# Identify overbought/oversold
df['RSI_Signal'] = 'Neutral'
df.loc[df['RSI'] > 70, 'RSI_Signal'] = 'Overbought'
df.loc[df['RSI'] < 30, 'RSI_Signal'] = 'Oversold'
print(df[['Close', 'RSI', 'RSI_Signal']].tail(10))

Resample to Different Timeframes

Convert 1-hour data to 4-hour or daily

Python
# Download 1-hour data
df_1h = yf.download('GBPUSD=X', period='60d', interval='1h')
# Resample to 4-hour
df_4h = df_1h.resample('4H').agg({
'Open': 'first',
'High': 'max',
'Low': 'min',
'Close': 'last',
'Volume': 'sum'
})
# Resample to daily
df_daily = df_1h.resample('D').agg({
'Open': 'first',
'High': 'max',
'Low': 'min',
'Close': 'last',
'Volume': 'sum'
})
print(f"1H bars: {len(df_1h)}")
print(f"4H bars: {len(df_4h)}")
print(f"Daily bars: {len(df_daily)}")

Vectorized Backtesting

Fast backtesting using pandas vectorization

Python
def backtest_strategy(df, initial_capital=10000):
"""Backtest a simple strategy"""
# Calculate returns
df['Returns'] = df['Close'].pct_change()
# Strategy returns (Signal shifted to avoid look-ahead bias)
df['Strategy_Returns'] = df['Signal'].shift(1) * df['Returns']
# Cumulative returns
df['Cumulative_Market'] = (1 + df['Returns']).cumprod()
df['Cumulative_Strategy'] = (1 + df['Strategy_Returns']).cumprod()
# Calculate metrics
total_return = (df['Cumulative_Strategy'].iloc[-1] - 1) * 100
market_return = (df['Cumulative_Market'].iloc[-1] - 1) * 100
sharpe = df['Strategy_Returns'].mean() / df['Strategy_Returns'].std()
sharpe_ratio = sharpe * (252 ** 0.5) # Annualized
# Win rate
winning_trades = df[df['Strategy_Returns'] > 0]
win_rate = len(winning_trades) / len(df[df['Strategy_Returns'] != 0]) * 100
print(f"Strategy Return: {total_return:.2f}%")
print(f"Market Return: {market_return:.2f}%")
print(f"Sharpe Ratio: {sharpe_ratio:.2f}")
print(f"Win Rate: {win_rate:.2f}%")
return df
results = backtest_strategy(df)

Bollinger Bands Strategy

Mean reversion using Bollinger Bands

Python
# Calculate Bollinger Bands
df['SMA_20'] = df['Close'].rolling(window=20).mean()
df['STD_20'] = df['Close'].rolling(window=20).std()
df['Upper_Band'] = df['SMA_20'] + (df['STD_20'] * 2)
df['Lower_Band'] = df['SMA_20'] - (df['STD_20'] * 2)
# Generate signals
df['BB_Signal'] = 0
df.loc[df['Close'] < df['Lower_Band'], 'BB_Signal'] = 1 # Buy oversold
df.loc[df['Close'] > df['Upper_Band'], 'BB_Signal'] = -1 # Sell overbought
# Calculate bandwidth (volatility indicator)
df['BB_Width'] = (df['Upper_Band'] - df['Lower_Band']) / df['SMA_20']
# Identify squeeze (low volatility)
df['Squeeze'] = df['BB_Width'] < df['BB_Width'].rolling(50).mean() * 0.5
print(df[['Close', 'Upper_Band', 'Lower_Band', 'BB_Signal', 'Squeeze']].tail())

Multi-Timeframe Analysis

Combine signals from multiple timeframes

Python
# Get data for different timeframes
df_1h = yf.download('EURUSD=X', period='90d', interval='1h')
df_4h = df_1h.resample('4H').agg({
'Open': 'first', 'High': 'max', 'Low': 'min', 'Close': 'last', 'Volume': 'sum'
})
df_daily = df_1h.resample('D').agg({
'Open': 'first', 'High': 'max', 'Low': 'min', 'Close': 'last', 'Volume': 'sum'
})
# Calculate trend for each timeframe
for df_tf, name in [(df_1h, '1H'), (df_4h, '4H'), (df_daily, 'Daily')]:
df_tf['SMA_50'] = df_tf['Close'].rolling(50).mean()
df_tf['SMA_200'] = df_tf['Close'].rolling(200).mean()
df_tf[f'Trend_{name}'] = 'Neutral'
df_tf.loc[df_tf['SMA_50'] > df_tf['SMA_200'], f'Trend_{name}'] = 'Bullish'
df_tf.loc[df_tf['SMA_50'] < df_tf['SMA_200'], f'Trend_{name}'] = 'Bearish'
# Merge trends to 1H timeframe
df_1h = df_1h.join(df_4h[['Trend_4H']], how='left').ffill()
df_1h = df_1h.join(df_daily[['Trend_Daily']], how='left').ffill()
# Only trade when all timeframes align
df_1h['All_Bullish'] = (df_1h['Trend_1H'] == 'Bullish') & \
(df_1h['Trend_4H'] == 'Bullish') & \
(df_1h['Trend_Daily'] == 'Bullish')
print(df_1h[['Close', 'Trend_1H', 'Trend_4H', 'Trend_Daily', 'All_Bullish']].tail())

Calculate Performance Metrics

Comprehensive strategy performance analysis

Python
import numpy as np
def calculate_metrics(df):
"""Calculate comprehensive performance metrics"""
returns = df['Strategy_Returns'].dropna()
# Basic metrics
total_return = (df['Cumulative_Strategy'].iloc[-1] - 1) * 100
annual_return = ((df['Cumulative_Strategy'].iloc[-1]) ** (252/len(df)) - 1) * 100
# Risk metrics
volatility = returns.std() * np.sqrt(252) * 100
sharpe = (returns.mean() / returns.std()) * np.sqrt(252)
# Drawdown
cumulative = df['Cumulative_Strategy']
running_max = cumulative.expanding().max()
drawdown = (cumulative - running_max) / running_max * 100
max_drawdown = drawdown.min()
# Win rate
wins = returns[returns > 0]
losses = returns[returns < 0]
win_rate = len(wins) / len(returns[returns != 0]) * 100
# Profit factor
gross_profit = wins.sum()
gross_loss = abs(losses.sum())
profit_factor = gross_profit / gross_loss if gross_loss != 0 else 0
# Average win/loss
avg_win = wins.mean() * 100 if len(wins) > 0 else 0
avg_loss = losses.mean() * 100 if len(losses) > 0 else 0
metrics = {
'Total Return': f'{total_return:.2f}%',
'Annual Return': f'{annual_return:.2f}%',
'Volatility': f'{volatility:.2f}%',
'Sharpe Ratio': f'{sharpe:.2f}',
'Max Drawdown': f'{max_drawdown:.2f}%',
'Win Rate': f'{win_rate:.2f}%',
'Profit Factor': f'{profit_factor:.2f}',
'Avg Win': f'{avg_win:.2f}%',
'Avg Loss': f'{avg_loss:.2f}%',
'Total Trades': len(returns[returns != 0])
}
for key, value in metrics.items():
print(f'{key}: {value}')
return metrics
metrics = calculate_metrics(df)

حالات الاستخدام الشائعة

تحليل السلاسل الزمنية
إعادة تحديد الإطار الزمني
تجميع البيانات
حسابات المحفظة
دمج مصادر البيانات
التقارير المالية
تحضير بيانات ML
التحليل الإحصائي
تصور البيانات
معالجة CSV

Best Practices & Common Pitfalls

استخدم DatetimeIndex

حوّل أعمدة التاريخ إلى DatetimeIndex للحصول على ميزات السلاسل الزمنية.

تجنب التكرار الحلقي

استخدم العمليات المتجهية بدلاً من حلقات for — أسرع بكثير.

تنظيف البيانات

تحقق دائماً من القيم المفقودة والمكررة قبل التحليل.

إدارة الذاكرة

استخدم dtypes مناسبة وعمليات chunked للمجموعات الكبيرة.

حذار من SettingWithCopy

استخدم .loc[] لتجنب التنبيهات والسلوك غير المتوقع.

نسخ البيانات

استخدم .copy() عند إنشاء DataFrames فرعية لتجنب التعديل غير المقصود.

موارد إضافية

Trading-Specific Resources

  • pandas-ta: Technical Analysis Extension
  • Vectorized Backtesting Tutorials
  • Financial Data Analysis Cookbook

الخطوات التالية

Now that you understand pandas for trading, explore these related libraries to build complete trading systems:

PipsGrowth - مراجعات خبراء الوسطاء، استراتيجيات التداول والأدوات