From 5fd8da528c010eba116f41dae258f1429a9f2085 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Wed, 17 Sep 2025 04:02:24 +0000 Subject: [PATCH] Refactor: Enhance README and add new strategy agents Update README with detailed structure, features, and tech stack. Add Momentum, Pairs Trading, Volatility, and other strategy agents. Improve backtesting engine and add visualization utilities. Co-authored-by: brandononchain --- README.md | 201 ++++- agents/momentum-agent.py | 417 +++++++++ agents/pairs-trading-agent.py | 338 +++++++ agents/volatility-agent.py | 457 ++++++++++ examples/complete_trading_system_example.py | 400 +++++++++ research/backtest-engine.py | 396 ++++++++- research/portfolio-manager.py | 568 ++++++++++++ research/strategy-optimizer.py | 733 ++++++++++++++++ utils/data-loader.py | 694 +++++++++++++++ utils/risk-analytics.py | 673 ++++++++++++++ utils/visualization.py | 923 ++++++++++++++++++++ 11 files changed, 5747 insertions(+), 53 deletions(-) create mode 100644 agents/momentum-agent.py create mode 100644 agents/pairs-trading-agent.py create mode 100644 agents/volatility-agent.py create mode 100644 examples/complete_trading_system_example.py create mode 100644 research/portfolio-manager.py create mode 100644 research/strategy-optimizer.py create mode 100644 utils/data-loader.py create mode 100644 utils/risk-analytics.py create mode 100644 utils/visualization.py diff --git a/README.md b/README.md index cc73c58..aa35316 100644 --- a/README.md +++ b/README.md @@ -1,67 +1,194 @@ -# ๐Ÿง  AI Algorithms +# ๐Ÿง  AI Algorithms - Advanced Trading System -A curated collection of AI-first trading and analysis tools, agents, and algorithmic logic. Built to explore the intersection of markets, machine learning, automation, and alpha. +A comprehensive collection of AI-first trading algorithms, backtesting infrastructure, and quantitative analysis tools. Built to explore the intersection of markets, machine learning, automation, and alpha generation. ## โš™๏ธ Overview -This repo is a live R\&D space for building and experimenting with AI-native trading algorithms. It includes: +This repository is a complete trading system development platform featuring: + +* **Advanced Trading Strategies**: Momentum, mean reversion, pairs trading, volatility strategies, and statistical arbitrage +* **Comprehensive Backtesting Engine**: Transaction costs, slippage, position sizing, and realistic market simulation +* **Portfolio Management**: Multi-strategy allocation, risk budgeting, and correlation management +* **Strategy Optimization**: Grid search, Bayesian optimization, walk-forward analysis, and overfitting detection +* **Risk Analytics**: VaR, stress testing, factor analysis, and tail risk measurement +* **Professional Visualization**: Interactive charts, performance dashboards, and risk analysis plots +* **Data Management**: Multi-source data loading, preprocessing, and feature engineering + +## ๐Ÿš€ Key Features + +### Trading Strategies +- **Momentum Agent**: Multi-timeframe momentum with RSI, MACD, and volume confirmation +- **Mean Reversion Agent**: Z-score based mean reversion with dynamic thresholds +- **Pairs Trading Agent**: Cointegration-based statistical arbitrage +- **Volatility Agents**: Breakout, mean reversion, and VIX-based strategies +- **Statistical Arbitrage**: Cross-sectional ranking and factor-based strategies + +### Backtesting Infrastructure +- **Enhanced Backtester**: Realistic simulation with transaction costs and slippage +- **Portfolio Manager**: Multi-strategy allocation with risk budgeting +- **Strategy Comparator**: Side-by-side performance analysis +- **Walk-Forward Analysis**: Out-of-sample validation and robustness testing + +### Risk Management +- **Comprehensive Risk Metrics**: Sharpe, Sortino, Calmar ratios and more +- **Value at Risk (VaR)**: Historical, parametric, and Monte Carlo methods +- **Stress Testing**: Scenario analysis and tail risk measurement +- **Factor Analysis**: Performance attribution and systematic risk decomposition + +### Optimization & Analysis +- **Parameter Optimization**: Grid search, random search, Bayesian optimization, and Optuna +- **Walk-Forward Analysis**: Time-series cross-validation for robust parameter selection +- **Overfitting Detection**: Statistical tests and consistency metrics +- **Monte Carlo Simulation**: Risk scenario generation and stress testing + +## ๐Ÿ“ Enhanced Structure -* Quant strategies (rule-based and learning-based) -* AI agent logic for automation and signal generation -* Tools for market structure analysis -* Experimental scripts and notebooks for futures, forex, crypto, and equities +```bash +AI-Algorithms/ +โ”œโ”€โ”€ agents/ # Trading strategy agents +โ”‚ โ”œโ”€โ”€ base-agent.py # Abstract base class for all strategies +โ”‚ โ”œโ”€โ”€ momentum-agent.py # Multi-timeframe momentum strategies +โ”‚ โ”œโ”€โ”€ mean-reversion-agent.py # Mean reversion and statistical arbitrage +โ”‚ โ”œโ”€โ”€ pairs-trading-agent.py # Cointegration-based pairs trading +โ”‚ โ”œโ”€โ”€ volatility-agent.py # Volatility breakout and mean reversion +โ”‚ โ””โ”€โ”€ ... +โ”œโ”€โ”€ research/ # Advanced research and backtesting +โ”‚ โ”œโ”€โ”€ backtest-engine.py # Enhanced backtesting with realistic costs +โ”‚ โ”œโ”€โ”€ portfolio-manager.py # Multi-strategy portfolio management +โ”‚ โ”œโ”€โ”€ strategy-optimizer.py # Parameter optimization and walk-forward +โ”‚ โ””โ”€โ”€ ... +โ”œโ”€โ”€ utils/ # Core utilities and analytics +โ”‚ โ”œโ”€โ”€ data-loader.py # Multi-source data loading and preprocessing +โ”‚ โ”œโ”€โ”€ risk-analytics.py # Comprehensive risk measurement +โ”‚ โ”œโ”€โ”€ visualization.py # Professional charting and dashboards +โ”‚ โ”œโ”€โ”€ performance.py # Performance metrics calculation +โ”‚ โ””โ”€โ”€ ml-utils.py # Machine learning utilities +โ”œโ”€โ”€ indicators/ # Technical indicators +โ”‚ โ”œโ”€โ”€ *.py # Python implementations +โ”‚ โ””โ”€โ”€ pinescript/ # TradingView Pine Script versions +โ”œโ”€โ”€ scripts/ # Standalone analysis scripts +โ”œโ”€โ”€ examples/ # Complete system demonstrations +โ”‚ โ””โ”€โ”€ complete_trading_system_example.py +โ””โ”€โ”€ README.md +``` -> โš ๏ธ **Note:** This is a sandbox project for research and prototyping. Use at your own risk. +## ๐Ÿงฐ Advanced Tech Stack +* **Core**: Python 3.8+, Pandas, NumPy, SciPy +* **Machine Learning**: Scikit-learn, Optuna, Bayesian optimization +* **Visualization**: Plotly, Matplotlib, Seaborn (interactive dashboards) +* **Data Sources**: yfinance, Alpha Vantage, Twelve Data, Quandl +* **Storage**: SQLite for caching, pickle for model persistence +* **Optimization**: Multi-processing, parallel backtesting +* **Risk Analytics**: Advanced statistical measures, factor models -## ๐Ÿšง Work in Progress +## ๐Ÿš€ Quick Start -This repo evolves continuously. Some code may be experimental, partially functional, or intentionally left incomplete for testing or prompt engineering purposes. +### 1. Installation +```bash +git clone https://github.com/yourusername/AI-Algorithms.git +cd AI-Algorithms +pip install -r requirements.txt # Create this with your dependencies +``` -If youโ€™re looking for: +### 2. Run Complete Example +```python +from examples.complete_trading_system_example import main -* Fully integrated bots or automated trading flows โ†’ check my n8n workflows or reach out. -* High-performance, production-ready systems โ†’ coming soon in Quantra Labโ€™s private repo. +# Run full system demonstration +results = main() +``` +### 3. Individual Components +```python +# Load data +from utils.data_loader import DataLoader, DataConfig +loader = DataLoader(DataConfig(add_technical_indicators=True)) +data = loader.get_data('AAPL') + +# Create strategy +from agents.momentum_agent import MomentumAgent +strategy = MomentumAgent({'fast_period': 10, 'slow_period': 30}) +signals = strategy.generate_detailed_signals(data) + +# Backtest +from research.backtest_engine import EnhancedBacktester, BacktestConfig +backtester = EnhancedBacktester(data, BacktestConfig()) +results = backtester.backtest_strategy(signals['signal']) + +# Visualize +from utils.visualization import TradingVisualizer +viz = TradingVisualizer() +fig = viz.plot_performance_dashboard(results) +fig.show() +``` -## ๐Ÿ“ Structure +## ๐Ÿ“Š Performance Analytics -```bash -AI-Algorithms/ -โ”œโ”€โ”€ agent/ # AI agent logic & inference -โ”œโ”€โ”€ scripts/ # Standalone scripts for signal generation, data prep, etc. -โ”œโ”€โ”€ indicators/ # Custom indicator logic (TradingView-style or Python-based) -โ”œโ”€โ”€ research/ # Jupyter notebooks, JSON, and research templates -โ”œโ”€โ”€ utils/ # Helpers for data handling, prompts, logging, etc. -โ”œโ”€โ”€ .env.example # Environment variable sample -โ””โ”€โ”€ README.md # You are here -``` +The system provides institutional-grade performance analytics: + +- **Return Metrics**: Total return, CAGR, volatility, Sharpe ratio +- **Risk Metrics**: Maximum drawdown, VaR, CVaR, tail ratios +- **Trade Analytics**: Win rate, profit factor, average win/loss +- **Factor Analysis**: Alpha, beta, systematic vs idiosyncratic risk +- **Portfolio Metrics**: Diversification ratio, risk contribution +## ๐ŸŽฏ Strategy Optimization -## ๐Ÿงฐ Tech Stack +Advanced optimization capabilities: -* Python (Pandas, NumPy, Scikit-learn, TA-Lib) -* OpenAI API & Langchain (for intelligent agents) -* TradingView-compatible indicators & signals -* Jupyter, JSON, YAML for workflows and prompts -* Integration-ready with n8n, MT5/MT4, ByBit, TwelveData, and more +- **Multiple Methods**: Grid search, random search, Bayesian optimization +- **Walk-Forward Analysis**: Time-series cross-validation +- **Overfitting Detection**: Statistical significance testing +- **Parallel Processing**: Multi-core optimization +- **Constraint Handling**: Parameter bounds and relationships +## ๐Ÿ“ˆ Visualization Suite + +Professional-grade visualization tools: + +- **Interactive Dashboards**: Plotly-based performance analytics +- **Risk Visualizations**: Drawdown plots, correlation heatmaps +- **Strategy Comparison**: Side-by-side performance analysis +- **Factor Analysis**: Risk attribution and factor loadings +- **Portfolio Analytics**: Allocation evolution and contribution analysis + +## ๐Ÿ”ฌ Research Applications + +This system is designed for: + +- **Strategy Development**: Rapid prototyping and testing of trading ideas +- **Academic Research**: Quantitative finance and algorithmic trading studies +- **Risk Management**: Portfolio risk assessment and scenario analysis +- **Performance Attribution**: Understanding strategy and factor contributions +- **Market Microstructure**: Analysis of trading costs and market impact + +## โš ๏ธ Important Disclaimers + +- **Research Purpose**: This system is designed for research and educational purposes +- **Risk Warning**: Trading involves substantial risk of loss +- **No Guarantees**: Past performance does not guarantee future results +- **Professional Advice**: Consult qualified professionals before making investment decisions ## ๐Ÿ”ฎ Vision > Build the future of trading with AI-first tools, not lagging indicators. -> Alpha isnโ€™t found โ€” itโ€™s engineered. - +> Alpha isn't found โ€” it's engineered through rigorous research and systematic testing. ## ๐Ÿ› ๏ธ Contributing -This is a personal playground, but if you're building something similar or want to collaborate: - -* Open an issue or PR -* Drop a DM on Twitter: [@brandononchain](https://twitter.com/brandononchain) +This is an evolving research platform. Contributions welcome: +* Open an issue for bugs or feature requests +* Submit PRs for enhancements +* Share research findings and strategy improvements +* Contact: [@brandononchain](https://twitter.com/brandononchain) ## ๐Ÿ“„ License -MIT โ€” feel free to fork, build, or adapt. Attribution appreciated. +MIT License โ€” feel free to fork, build, or adapt. Attribution appreciated. + +--- + +*Built with โค๏ธ for the quantitative trading community* diff --git a/agents/momentum-agent.py b/agents/momentum-agent.py new file mode 100644 index 0000000..6e19e0b --- /dev/null +++ b/agents/momentum-agent.py @@ -0,0 +1,417 @@ +""" +Momentum Trading Agent + +Multi-timeframe momentum strategy that captures trending moves +using various momentum indicators and filters. +""" + +import pandas as pd +import numpy as np +from agents.base_agent import BaseAgent +from typing import Dict, List, Optional, Tuple +import talib + + +class MomentumAgent(BaseAgent): + """ + Momentum trading strategy using multiple timeframes and indicators: + - Price momentum (rate of change) + - RSI momentum + - MACD momentum + - Volume confirmation + - Trend strength filters + """ + + def __init__(self, config: dict = None): + super().__init__(config) + + # Momentum parameters + self.fast_period = self.config.get("fast_period", 10) + self.slow_period = self.config.get("slow_period", 20) + self.momentum_threshold = self.config.get("momentum_threshold", 0.02) + + # RSI parameters + self.rsi_period = self.config.get("rsi_period", 14) + self.rsi_overbought = self.config.get("rsi_overbought", 70) + self.rsi_oversold = self.config.get("rsi_oversold", 30) + + # MACD parameters + self.macd_fast = self.config.get("macd_fast", 12) + self.macd_slow = self.config.get("macd_slow", 26) + self.macd_signal = self.config.get("macd_signal", 9) + + # Volume parameters + self.volume_ma_period = self.config.get("volume_ma_period", 20) + self.volume_threshold = self.config.get("volume_threshold", 1.2) + + # Risk management + self.min_trend_strength = self.config.get("min_trend_strength", 0.5) + self.max_volatility = self.config.get("max_volatility", 0.05) + + def calculate_price_momentum(self, prices: pd.Series) -> pd.Series: + """Calculate price momentum (rate of change)""" + return prices.pct_change(self.fast_period) + + def calculate_momentum_strength(self, prices: pd.Series) -> pd.Series: + """Calculate momentum strength using multiple periods""" + mom_fast = prices.pct_change(self.fast_period) + mom_slow = prices.pct_change(self.slow_period) + + # Momentum strength is the ratio of fast to slow momentum + momentum_strength = mom_fast / (mom_slow + 1e-8) # Add small value to avoid division by zero + return momentum_strength + + def calculate_rsi_momentum(self, prices: pd.Series) -> pd.Series: + """Calculate RSI-based momentum signals""" + try: + rsi = talib.RSI(prices.values, timeperiod=self.rsi_period) + rsi_series = pd.Series(rsi, index=prices.index) + + # RSI momentum: positive when RSI is rising and above 50 + rsi_change = rsi_series.diff() + rsi_momentum = np.where( + (rsi_series > 50) & (rsi_change > 0), 1, + np.where((rsi_series < 50) & (rsi_change < 0), -1, 0) + ) + + return pd.Series(rsi_momentum, index=prices.index) + except: + # Fallback manual RSI calculation + return self._manual_rsi_momentum(prices) + + def _manual_rsi_momentum(self, prices: pd.Series) -> pd.Series: + """Manual RSI calculation as fallback""" + delta = prices.diff() + gain = (delta.where(delta > 0, 0)).rolling(window=self.rsi_period).mean() + loss = (-delta.where(delta < 0, 0)).rolling(window=self.rsi_period).mean() + + rs = gain / loss + rsi = 100 - (100 / (1 + rs)) + + rsi_change = rsi.diff() + rsi_momentum = np.where( + (rsi > 50) & (rsi_change > 0), 1, + np.where((rsi < 50) & (rsi_change < 0), -1, 0) + ) + + return pd.Series(rsi_momentum, index=prices.index) + + def calculate_macd_momentum(self, prices: pd.Series) -> pd.Series: + """Calculate MACD-based momentum""" + try: + macd, macd_signal, macd_hist = talib.MACD( + prices.values, + fastperiod=self.macd_fast, + slowperiod=self.macd_slow, + signalperiod=self.macd_signal + ) + + macd_series = pd.Series(macd, index=prices.index) + signal_series = pd.Series(macd_signal, index=prices.index) + + # MACD momentum: positive when MACD > signal and both rising + macd_momentum = np.where( + (macd_series > signal_series) & (macd_series.diff() > 0), 1, + np.where((macd_series < signal_series) & (macd_series.diff() < 0), -1, 0) + ) + + return pd.Series(macd_momentum, index=prices.index) + except: + return self._manual_macd_momentum(prices) + + def _manual_macd_momentum(self, prices: pd.Series) -> pd.Series: + """Manual MACD calculation as fallback""" + ema_fast = prices.ewm(span=self.macd_fast).mean() + ema_slow = prices.ewm(span=self.macd_slow).mean() + macd = ema_fast - ema_slow + signal = macd.ewm(span=self.macd_signal).mean() + + macd_momentum = np.where( + (macd > signal) & (macd.diff() > 0), 1, + np.where((macd < signal) & (macd.diff() < 0), -1, 0) + ) + + return pd.Series(macd_momentum, index=prices.index) + + def calculate_volume_confirmation(self, market_data: pd.DataFrame) -> pd.Series: + """Calculate volume-based confirmation""" + if 'volume' not in market_data.columns: + return pd.Series(1, index=market_data.index) # No volume data + + volume = market_data['volume'] + volume_ma = volume.rolling(self.volume_ma_period).mean() + + # Volume confirmation: 1 if above average, 0 otherwise + volume_conf = (volume > volume_ma * self.volume_threshold).astype(int) + return volume_conf + + def calculate_trend_strength(self, prices: pd.Series) -> pd.Series: + """Calculate trend strength using ADX-like measure""" + high = prices # Simplified - using close as high + low = prices # Simplified - using close as low + close = prices + + # Calculate True Range + tr1 = high - low + tr2 = abs(high - close.shift(1)) + tr3 = abs(low - close.shift(1)) + true_range = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1) + + # Calculate Directional Movement + dm_plus = np.where((high - high.shift(1)) > (low.shift(1) - low), + np.maximum(high - high.shift(1), 0), 0) + dm_minus = np.where((low.shift(1) - low) > (high - high.shift(1)), + np.maximum(low.shift(1) - low, 0), 0) + + dm_plus = pd.Series(dm_plus, index=prices.index) + dm_minus = pd.Series(dm_minus, index=prices.index) + + # Smooth the values + period = 14 + tr_smooth = true_range.rolling(period).mean() + dm_plus_smooth = dm_plus.rolling(period).mean() + dm_minus_smooth = dm_minus.rolling(period).mean() + + # Calculate DI+ and DI- + di_plus = 100 * dm_plus_smooth / tr_smooth + di_minus = 100 * dm_minus_smooth / tr_smooth + + # Calculate DX and ADX (trend strength) + dx = 100 * abs(di_plus - di_minus) / (di_plus + di_minus + 1e-8) + adx = dx.rolling(period).mean() + + return adx / 100 # Normalize to 0-1 range + + def calculate_volatility_filter(self, prices: pd.Series) -> pd.Series: + """Calculate volatility filter to avoid trading in high volatility periods""" + returns = prices.pct_change() + volatility = returns.rolling(20).std() + + # Filter: 1 if volatility is acceptable, 0 otherwise + vol_filter = (volatility < self.max_volatility).astype(int) + return vol_filter + + def generate_signal(self, market_data: pd.DataFrame) -> str: + """Generate momentum trading signal""" + prices = market_data['close'] + + if len(prices) < max(self.slow_period, self.rsi_period, 30): + return 'HOLD' + + # Calculate all momentum indicators + price_momentum = self.calculate_price_momentum(prices) + momentum_strength = self.calculate_momentum_strength(prices) + rsi_momentum = self.calculate_rsi_momentum(prices) + macd_momentum = self.calculate_macd_momentum(prices) + volume_conf = self.calculate_volume_confirmation(market_data) + trend_strength = self.calculate_trend_strength(prices) + vol_filter = self.calculate_volatility_filter(prices) + + # Get latest values + current_price_mom = price_momentum.iloc[-1] + current_mom_strength = momentum_strength.iloc[-1] + current_rsi_mom = rsi_momentum.iloc[-1] + current_macd_mom = macd_momentum.iloc[-1] + current_volume_conf = volume_conf.iloc[-1] + current_trend_strength = trend_strength.iloc[-1] + current_vol_filter = vol_filter.iloc[-1] + + # Skip if conditions are not met + if (current_vol_filter == 0 or + current_trend_strength < self.min_trend_strength or + current_volume_conf == 0): + return 'HOLD' + + # Combine momentum signals + momentum_score = 0 + + # Price momentum (strongest weight) + if abs(current_price_mom) > self.momentum_threshold: + momentum_score += 3 * np.sign(current_price_mom) + + # Momentum strength + if abs(current_mom_strength) > 1.2: + momentum_score += 2 * np.sign(current_mom_strength) + + # Technical momentum indicators + momentum_score += current_rsi_mom + momentum_score += current_macd_mom + + # Weight by trend strength + momentum_score *= current_trend_strength + + # Generate final signal + if momentum_score > 2: + return 'BUY' + elif momentum_score < -2: + return 'SELL' + else: + return 'HOLD' + + def generate_detailed_signals(self, market_data: pd.DataFrame) -> pd.DataFrame: + """Generate detailed momentum signals with all indicators""" + prices = market_data['close'] + + # Calculate all indicators + price_momentum = self.calculate_price_momentum(prices) + momentum_strength = self.calculate_momentum_strength(prices) + rsi_momentum = self.calculate_rsi_momentum(prices) + macd_momentum = self.calculate_macd_momentum(prices) + volume_conf = self.calculate_volume_confirmation(market_data) + trend_strength = self.calculate_trend_strength(prices) + vol_filter = self.calculate_volatility_filter(prices) + + # Combine into signals + momentum_scores = [] + signals = [] + + for i in range(len(prices)): + if i < max(self.slow_period, self.rsi_period, 30): + momentum_scores.append(0) + signals.append(0) + continue + + # Get current values + price_mom = price_momentum.iloc[i] + mom_strength = momentum_strength.iloc[i] + rsi_mom = rsi_momentum.iloc[i] + macd_mom = macd_momentum.iloc[i] + vol_conf = volume_conf.iloc[i] + trend_str = trend_strength.iloc[i] + vol_filt = vol_filter.iloc[i] + + # Skip if conditions are not met + if (vol_filt == 0 or trend_str < self.min_trend_strength or vol_conf == 0): + momentum_scores.append(0) + signals.append(0) + continue + + # Calculate momentum score + momentum_score = 0 + + if abs(price_mom) > self.momentum_threshold: + momentum_score += 3 * np.sign(price_mom) + + if abs(mom_strength) > 1.2: + momentum_score += 2 * np.sign(mom_strength) + + momentum_score += rsi_mom + macd_mom + momentum_score *= trend_str + + momentum_scores.append(momentum_score) + + # Generate signal + if momentum_score > 2: + signals.append(1) + elif momentum_score < -2: + signals.append(-1) + else: + signals.append(0) + + # Create results DataFrame + results = pd.DataFrame({ + 'price': prices, + 'price_momentum': price_momentum, + 'momentum_strength': momentum_strength, + 'rsi_momentum': rsi_momentum, + 'macd_momentum': macd_momentum, + 'volume_confirmation': volume_conf, + 'trend_strength': trend_strength, + 'volatility_filter': vol_filter, + 'momentum_score': momentum_scores, + 'signal': signals + }, index=prices.index) + + return results + + +class VolatilityMomentumAgent(BaseAgent): + """ + Volatility-adjusted momentum strategy that scales position size + based on volatility and momentum strength. + """ + + def __init__(self, config: dict = None): + super().__init__(config) + self.lookback_period = self.config.get("lookback_period", 20) + self.momentum_threshold = self.config.get("momentum_threshold", 0.01) + self.vol_lookback = self.config.get("vol_lookback", 20) + self.target_volatility = self.config.get("target_volatility", 0.15) + + def calculate_volatility_adjusted_momentum(self, prices: pd.Series) -> Tuple[pd.Series, pd.Series]: + """Calculate momentum adjusted for volatility""" + returns = prices.pct_change() + + # Calculate rolling volatility + volatility = returns.rolling(self.vol_lookback).std() * np.sqrt(252) + + # Calculate momentum + momentum = prices.pct_change(self.lookback_period) + + # Volatility-adjusted momentum + vol_adj_momentum = momentum / (volatility / self.target_volatility) + + return vol_adj_momentum, volatility + + def generate_signal(self, market_data: pd.DataFrame) -> str: + """Generate volatility-adjusted momentum signal""" + prices = market_data['close'] + + if len(prices) < max(self.lookback_period, self.vol_lookback): + return 'HOLD' + + vol_adj_momentum, volatility = self.calculate_volatility_adjusted_momentum(prices) + + current_momentum = vol_adj_momentum.iloc[-1] + current_volatility = volatility.iloc[-1] + + # Avoid trading in extreme volatility conditions + if current_volatility > 2 * self.target_volatility: + return 'HOLD' + + # Generate signal based on volatility-adjusted momentum + if current_momentum > self.momentum_threshold: + return 'BUY' + elif current_momentum < -self.momentum_threshold: + return 'SELL' + else: + return 'HOLD' + + +# Example usage and testing +if __name__ == "__main__": + # Generate sample data with momentum patterns + np.random.seed(42) + dates = pd.date_range('2020-01-01', '2023-12-31', freq='D') + + # Create trending price data + trend = np.linspace(0, 2, len(dates)) # Upward trend + noise = np.random.randn(len(dates)) * 0.02 + momentum_shocks = np.random.randn(len(dates)) * 0.01 + momentum_shocks[::50] *= 5 # Add occasional momentum shocks + + log_prices = trend + np.cumsum(noise + momentum_shocks) + prices = 100 * np.exp(log_prices) + volumes = np.random.randint(1000, 10000, len(dates)) + + sample_data = pd.DataFrame({ + 'close': prices, + 'volume': volumes + }, index=dates) + + # Test momentum agent + momentum_agent = MomentumAgent({ + 'fast_period': 10, + 'slow_period': 20, + 'momentum_threshold': 0.02, + 'min_trend_strength': 0.3 + }) + + # Generate detailed signals + detailed_results = momentum_agent.generate_detailed_signals(sample_data) + + print("Momentum Strategy Results:") + print(f"Total signals: {(detailed_results['signal'] != 0).sum()}") + print(f"Buy signals: {(detailed_results['signal'] == 1).sum()}") + print(f"Sell signals: {(detailed_results['signal'] == -1).sum()}") + print(f"Average momentum score: {detailed_results['momentum_score'].mean():.3f}") \ No newline at end of file diff --git a/agents/pairs-trading-agent.py b/agents/pairs-trading-agent.py new file mode 100644 index 0000000..27a4535 --- /dev/null +++ b/agents/pairs-trading-agent.py @@ -0,0 +1,338 @@ +""" +Pairs Trading Agent + +Statistical arbitrage strategy that trades on mean-reverting relationships +between correlated assets. Uses cointegration and z-score analysis. +""" + +import pandas as pd +import numpy as np +from scipy import stats +from statsmodels.tsa.stattools import cointeg +from agents.base_agent import BaseAgent +from typing import Tuple, Dict, Optional + + +class PairsTradingAgent(BaseAgent): + """ + Pairs trading strategy based on cointegration and mean reversion. + + Strategy: + 1. Identify cointegrated pairs + 2. Calculate z-score of spread + 3. Enter positions when z-score exceeds threshold + 4. Exit when z-score reverts to mean + """ + + def __init__(self, config: dict = None): + super().__init__(config) + self.lookback_window = self.config.get("lookback_window", 60) + self.entry_threshold = self.config.get("entry_threshold", 2.0) + self.exit_threshold = self.config.get("exit_threshold", 0.5) + self.stop_loss_threshold = self.config.get("stop_loss_threshold", 3.5) + self.min_half_life = self.config.get("min_half_life", 1) + self.max_half_life = self.config.get("max_half_life", 30) + + # Store pair relationship data + self.hedge_ratio = None + self.spread_mean = None + self.spread_std = None + self.current_position = 0 + + def calculate_cointegration(self, y1: pd.Series, y2: pd.Series) -> Tuple[float, float, float]: + """ + Test for cointegration between two price series. + + Returns: + - cointegration test statistic + - p-value + - hedge ratio (beta) + """ + # Perform Engle-Granger cointegration test + coint_result = cointeg(y1, y2) + test_stat = coint_result[0] + p_value = coint_result[1] + + # Calculate hedge ratio using OLS regression + X = np.column_stack([np.ones(len(y2)), y2]) + beta = np.linalg.lstsq(X, y1, rcond=None)[0] + hedge_ratio = beta[1] + + return test_stat, p_value, hedge_ratio + + def calculate_half_life(self, spread: pd.Series) -> float: + """ + Calculate the half-life of mean reversion for the spread. + """ + spread_lag = spread.shift(1) + spread_diff = spread.diff() + + # Remove NaN values + valid_idx = ~(spread_lag.isna() | spread_diff.isna()) + spread_lag_clean = spread_lag[valid_idx] + spread_diff_clean = spread_diff[valid_idx] + + # Regression: spread_diff = alpha + beta * spread_lag + error + X = np.column_stack([np.ones(len(spread_lag_clean)), spread_lag_clean]) + try: + coeffs = np.linalg.lstsq(X, spread_diff_clean, rcond=None)[0] + beta = coeffs[1] + + # Half-life calculation + if beta < 0: + half_life = -np.log(2) / beta + else: + half_life = np.inf + except: + half_life = np.inf + + return half_life + + def calculate_spread_statistics(self, y1: pd.Series, y2: pd.Series, + hedge_ratio: float) -> Tuple[pd.Series, float, float]: + """ + Calculate spread and its statistical properties. + """ + spread = y1 - hedge_ratio * y2 + spread_mean = spread.mean() + spread_std = spread.std() + + return spread, spread_mean, spread_std + + def generate_signals_pair(self, data1: pd.DataFrame, data2: pd.DataFrame) -> pd.Series: + """ + Generate trading signals for a pair of assets. + + Args: + data1: Price data for first asset + data2: Price data for second asset + + Returns: + Series with signals: 1 (long spread), -1 (short spread), 0 (no position) + """ + prices1 = data1['close'] + prices2 = data2['close'] + + # Ensure same index + common_index = prices1.index.intersection(prices2.index) + prices1 = prices1[common_index] + prices2 = prices2[common_index] + + signals = pd.Series(0, index=common_index) + + if len(prices1) < self.lookback_window: + return signals + + for i in range(self.lookback_window, len(prices1)): + # Use rolling window for cointegration analysis + y1_window = prices1.iloc[i-self.lookback_window:i] + y2_window = prices2.iloc[i-self.lookback_window:i] + + # Test cointegration + try: + test_stat, p_value, hedge_ratio = self.calculate_cointegration(y1_window, y2_window) + + # Only proceed if pairs are cointegrated (p < 0.05) + if p_value < 0.05: + # Calculate spread + spread, spread_mean, spread_std = self.calculate_spread_statistics( + y1_window, y2_window, hedge_ratio + ) + + # Check half-life + half_life = self.calculate_half_life(spread) + if not (self.min_half_life <= half_life <= self.max_half_life): + continue + + # Calculate current z-score + current_spread = prices1.iloc[i] - hedge_ratio * prices2.iloc[i] + z_score = (current_spread - spread_mean) / spread_std + + # Generate signals based on z-score + if abs(z_score) > self.entry_threshold and self.current_position == 0: + # Enter position + if z_score > 0: + signals.iloc[i] = -1 # Short spread (short asset1, long asset2) + self.current_position = -1 + else: + signals.iloc[i] = 1 # Long spread (long asset1, short asset2) + self.current_position = 1 + + elif abs(z_score) < self.exit_threshold and self.current_position != 0: + # Exit position + signals.iloc[i] = 0 + self.current_position = 0 + + elif abs(z_score) > self.stop_loss_threshold and self.current_position != 0: + # Stop loss + signals.iloc[i] = 0 + self.current_position = 0 + + else: + # Hold current position + signals.iloc[i] = self.current_position + + except Exception as e: + # Skip if cointegration test fails + continue + + return signals + + def generate_signal(self, market_data: pd.DataFrame) -> str: + """ + Generate signal for single asset (not applicable for pairs trading). + This method is required by base class but pairs trading needs two assets. + """ + return 'HOLD' + + def find_cointegrated_pairs(self, price_data: Dict[str, pd.DataFrame], + min_correlation: float = 0.7) -> List[Tuple[str, str, float]]: + """ + Find cointegrated pairs from a universe of assets. + + Args: + price_data: Dictionary of asset name -> price DataFrame + min_correlation: Minimum correlation threshold + + Returns: + List of tuples (asset1, asset2, p_value) + """ + assets = list(price_data.keys()) + cointegrated_pairs = [] + + for i in range(len(assets)): + for j in range(i+1, len(assets)): + asset1, asset2 = assets[i], assets[j] + + # Get common time period + prices1 = price_data[asset1]['close'] + prices2 = price_data[asset2]['close'] + common_index = prices1.index.intersection(prices2.index) + + if len(common_index) < self.lookback_window: + continue + + p1 = prices1[common_index] + p2 = prices2[common_index] + + # Check correlation first + correlation = p1.corr(p2) + if abs(correlation) < min_correlation: + continue + + # Test cointegration + try: + test_stat, p_value, hedge_ratio = self.calculate_cointegration(p1, p2) + + if p_value < 0.05: # Cointegrated at 5% level + cointegrated_pairs.append((asset1, asset2, p_value)) + + except Exception: + continue + + # Sort by p-value (most cointegrated first) + cointegrated_pairs.sort(key=lambda x: x[2]) + return cointegrated_pairs + + +class StatisticalArbitrageAgent(BaseAgent): + """ + Statistical arbitrage strategy using multiple statistical techniques: + - Mean reversion + - Momentum + - Cross-sectional ranking + """ + + def __init__(self, config: dict = None): + super().__init__(config) + self.lookback_window = self.config.get("lookback_window", 20) + self.momentum_window = self.config.get("momentum_window", 10) + self.reversion_threshold = self.config.get("reversion_threshold", 1.5) + self.momentum_threshold = self.config.get("momentum_threshold", 0.02) + + def calculate_z_score(self, prices: pd.Series, window: int = None) -> pd.Series: + """Calculate rolling z-score""" + if window is None: + window = self.lookback_window + + rolling_mean = prices.rolling(window).mean() + rolling_std = prices.rolling(window).std() + z_score = (prices - rolling_mean) / rolling_std + + return z_score + + def calculate_momentum(self, prices: pd.Series, window: int = None) -> pd.Series: + """Calculate price momentum""" + if window is None: + window = self.momentum_window + + momentum = prices.pct_change(window) + return momentum + + def generate_signal(self, market_data: pd.DataFrame) -> str: + """ + Generate trading signal based on statistical arbitrage. + """ + prices = market_data['close'] + + if len(prices) < max(self.lookback_window, self.momentum_window): + return 'HOLD' + + # Calculate indicators + z_score = self.calculate_z_score(prices) + momentum = self.calculate_momentum(prices) + + current_z = z_score.iloc[-1] + current_momentum = momentum.iloc[-1] + + # Mean reversion signal + reversion_signal = 0 + if current_z > self.reversion_threshold: + reversion_signal = -1 # Expect reversion down + elif current_z < -self.reversion_threshold: + reversion_signal = 1 # Expect reversion up + + # Momentum signal + momentum_signal = 0 + if current_momentum > self.momentum_threshold: + momentum_signal = 1 # Positive momentum + elif current_momentum < -self.momentum_threshold: + momentum_signal = -1 # Negative momentum + + # Combine signals (momentum takes precedence for strong moves) + if abs(current_momentum) > 2 * self.momentum_threshold: + final_signal = momentum_signal + else: + final_signal = reversion_signal + + signal_map = {1: 'BUY', -1: 'SELL', 0: 'HOLD'} + return signal_map[final_signal] + + +# Example usage +if __name__ == "__main__": + # Generate sample correlated data for testing + np.random.seed(42) + dates = pd.date_range('2020-01-01', '2023-12-31', freq='D') + + # Create cointegrated pair + common_factor = np.cumsum(np.random.randn(len(dates)) * 0.01) + noise1 = np.random.randn(len(dates)) * 0.005 + noise2 = np.random.randn(len(dates)) * 0.005 + + prices1 = 100 * np.exp(common_factor + noise1) + prices2 = 95 * np.exp(0.95 * common_factor + noise2) # Cointegrated with ratio ~1.05 + + data1 = pd.DataFrame({'close': prices1}, index=dates) + data2 = pd.DataFrame({'close': prices2}, index=dates) + + # Test pairs trading + pairs_agent = PairsTradingAgent({ + 'lookback_window': 60, + 'entry_threshold': 2.0, + 'exit_threshold': 0.5 + }) + + signals = pairs_agent.generate_signals_pair(data1, data2) + print(f"Generated {(signals != 0).sum()} trading signals") + print(f"Signal distribution: {signals.value_counts()}") \ No newline at end of file diff --git a/agents/volatility-agent.py b/agents/volatility-agent.py new file mode 100644 index 0000000..c172efb --- /dev/null +++ b/agents/volatility-agent.py @@ -0,0 +1,457 @@ +""" +Volatility Trading Agent + +Strategies that trade on volatility patterns: +- Volatility breakouts +- Volatility mean reversion +- VIX-based strategies +- Volatility surface arbitrage +""" + +import pandas as pd +import numpy as np +from agents.base_agent import BaseAgent +from typing import Dict, List, Optional, Tuple +from scipy import stats +import warnings +warnings.filterwarnings('ignore') + + +class VolatilityBreakoutAgent(BaseAgent): + """ + Volatility breakout strategy that trades when volatility breaks + above/below historical ranges. + """ + + def __init__(self, config: dict = None): + super().__init__(config) + self.vol_lookback = self.config.get("vol_lookback", 20) + self.breakout_threshold = self.config.get("breakout_threshold", 2.0) # Standard deviations + self.min_vol_change = self.config.get("min_vol_change", 0.5) # Minimum volatility change + self.holding_period = self.config.get("holding_period", 5) # Days to hold position + self.vol_estimation_method = self.config.get("vol_estimation_method", "close_to_close") + + def calculate_volatility(self, market_data: pd.DataFrame, method: str = "close_to_close") -> pd.Series: + """Calculate volatility using different methods""" + if method == "close_to_close": + returns = market_data['close'].pct_change() + volatility = returns.rolling(self.vol_lookback).std() * np.sqrt(252) + + elif method == "parkinson" and all(col in market_data.columns for col in ['high', 'low']): + # Parkinson volatility estimator + high = market_data['high'] + low = market_data['low'] + hl_ratio = np.log(high / low) + parkinson_var = (hl_ratio ** 2) / (4 * np.log(2)) + volatility = np.sqrt(parkinson_var.rolling(self.vol_lookback).mean() * 252) + + elif method == "garman_klass" and all(col in market_data.columns for col in ['high', 'low', 'open', 'close']): + # Garman-Klass volatility estimator + high = market_data['high'] + low = market_data['low'] + open_price = market_data['open'] + close = market_data['close'] + + gk_var = (0.5 * (np.log(high / low) ** 2) - + (2 * np.log(2) - 1) * (np.log(close / open_price) ** 2)) + volatility = np.sqrt(gk_var.rolling(self.vol_lookback).mean() * 252) + + else: + # Default to close-to-close + returns = market_data['close'].pct_change() + volatility = returns.rolling(self.vol_lookback).std() * np.sqrt(252) + + return volatility + + def detect_volatility_breakout(self, volatility: pd.Series) -> pd.Series: + """Detect volatility breakouts""" + vol_mean = volatility.rolling(self.vol_lookback * 2).mean() + vol_std = volatility.rolling(self.vol_lookback * 2).std() + + # Z-score of current volatility + vol_zscore = (volatility - vol_mean) / vol_std + + # Breakout signals + breakout_signals = pd.Series(0, index=volatility.index) + breakout_signals[vol_zscore > self.breakout_threshold] = 1 # High vol breakout + breakout_signals[vol_zscore < -self.breakout_threshold] = -1 # Low vol breakout + + return breakout_signals + + def generate_signal(self, market_data: pd.DataFrame) -> str: + """Generate volatility breakout signal""" + if len(market_data) < self.vol_lookback * 3: + return 'HOLD' + + # Calculate volatility + volatility = self.calculate_volatility(market_data, self.vol_estimation_method) + + # Detect breakouts + breakout_signals = self.detect_volatility_breakout(volatility) + + current_signal = breakout_signals.iloc[-1] + current_vol = volatility.iloc[-1] + prev_vol = volatility.iloc[-2] if len(volatility) > 1 else current_vol + + # Check for minimum volatility change + vol_change = abs(current_vol - prev_vol) / prev_vol if prev_vol > 0 else 0 + + if vol_change < self.min_vol_change: + return 'HOLD' + + # Generate trading signal + if current_signal == 1: + return 'BUY' # High volatility breakout - expect continuation + elif current_signal == -1: + return 'SELL' # Low volatility breakout - expect mean reversion + else: + return 'HOLD' + + +class VolatilityMeanReversionAgent(BaseAgent): + """ + Volatility mean reversion strategy that trades when volatility + is expected to revert to its long-term mean. + """ + + def __init__(self, config: dict = None): + super().__init__(config) + self.short_vol_window = self.config.get("short_vol_window", 10) + self.long_vol_window = self.config.get("long_vol_window", 50) + self.reversion_threshold = self.config.get("reversion_threshold", 1.5) + self.vol_percentile_high = self.config.get("vol_percentile_high", 80) + self.vol_percentile_low = self.config.get("vol_percentile_low", 20) + + def calculate_volatility_regime(self, market_data: pd.DataFrame) -> Tuple[pd.Series, pd.Series, pd.Series]: + """Identify volatility regime and mean reversion opportunities""" + returns = market_data['close'].pct_change() + + # Short and long-term volatility + short_vol = returns.rolling(self.short_vol_window).std() * np.sqrt(252) + long_vol = returns.rolling(self.long_vol_window).std() * np.sqrt(252) + + # Volatility ratio + vol_ratio = short_vol / long_vol + + # Historical percentiles + vol_percentiles = short_vol.rolling(self.long_vol_window * 2).rank(pct=True) * 100 + + return short_vol, long_vol, vol_ratio, vol_percentiles + + def generate_signal(self, market_data: pd.DataFrame) -> str: + """Generate volatility mean reversion signal""" + if len(market_data) < self.long_vol_window * 2: + return 'HOLD' + + short_vol, long_vol, vol_ratio, vol_percentiles = self.calculate_volatility_regime(market_data) + + current_vol_ratio = vol_ratio.iloc[-1] + current_percentile = vol_percentiles.iloc[-1] + + # Mean reversion signals + if (current_vol_ratio > self.reversion_threshold and + current_percentile > self.vol_percentile_high): + return 'SELL' # High volatility, expect reversion down + elif (current_vol_ratio < (1 / self.reversion_threshold) and + current_percentile < self.vol_percentile_low): + return 'BUY' # Low volatility, expect reversion up + else: + return 'HOLD' + + +class VIXBasedAgent(BaseAgent): + """ + VIX-based trading strategy (simulated VIX from price data). + Trades based on fear/greed cycles in the market. + """ + + def __init__(self, config: dict = None): + super().__init__(config) + self.vix_window = self.config.get("vix_window", 20) + self.vix_high_threshold = self.config.get("vix_high_threshold", 30) # High fear + self.vix_low_threshold = self.config.get("vix_low_threshold", 15) # Low fear/complacency + self.vix_spike_threshold = self.config.get("vix_spike_threshold", 1.5) # VIX spike multiplier + + def calculate_synthetic_vix(self, market_data: pd.DataFrame) -> pd.Series: + """Calculate synthetic VIX from price data""" + returns = market_data['close'].pct_change() + + # Rolling volatility (annualized) + rolling_vol = returns.rolling(self.vix_window).std() * np.sqrt(252) * 100 + + # Apply VIX-like scaling (VIX tends to be higher than realized vol) + synthetic_vix = rolling_vol * 1.2 # Scaling factor + + return synthetic_vix + + def detect_vix_spikes(self, vix: pd.Series) -> pd.Series: + """Detect VIX spikes that often mark market bottoms""" + vix_ma = vix.rolling(self.vix_window).mean() + vix_spikes = vix > (vix_ma * self.vix_spike_threshold) + + return vix_spikes + + def generate_signal(self, market_data: pd.DataFrame) -> str: + """Generate VIX-based trading signal""" + if len(market_data) < self.vix_window * 2: + return 'HOLD' + + synthetic_vix = self.calculate_synthetic_vix(market_data) + vix_spikes = self.detect_vix_spikes(synthetic_vix) + + current_vix = synthetic_vix.iloc[-1] + current_spike = vix_spikes.iloc[-1] + + # VIX-based signals + if current_spike or current_vix > self.vix_high_threshold: + return 'BUY' # High fear - contrarian buy + elif current_vix < self.vix_low_threshold: + return 'SELL' # Low fear/complacency - expect volatility increase + else: + return 'HOLD' + + +class VolatilitySurfaceAgent(BaseAgent): + """ + Volatility surface arbitrage strategy that looks for + inconsistencies in implied vs realized volatility. + """ + + def __init__(self, config: dict = None): + super().__init__(config) + self.short_term_window = self.config.get("short_term_window", 5) + self.medium_term_window = self.config.get("medium_term_window", 20) + self.long_term_window = self.config.get("long_term_window", 60) + self.vol_spread_threshold = self.config.get("vol_spread_threshold", 0.05) + + def calculate_term_structure(self, market_data: pd.DataFrame) -> Dict[str, pd.Series]: + """Calculate volatility term structure""" + returns = market_data['close'].pct_change() + + vol_structure = { + 'short_term': returns.rolling(self.short_term_window).std() * np.sqrt(252), + 'medium_term': returns.rolling(self.medium_term_window).std() * np.sqrt(252), + 'long_term': returns.rolling(self.long_term_window).std() * np.sqrt(252) + } + + return vol_structure + + def detect_term_structure_anomalies(self, vol_structure: Dict[str, pd.Series]) -> pd.Series: + """Detect anomalies in volatility term structure""" + short_vol = vol_structure['short_term'] + medium_vol = vol_structure['medium_term'] + long_vol = vol_structure['long_term'] + + # Calculate spreads + short_medium_spread = short_vol - medium_vol + medium_long_spread = medium_vol - long_vol + + # Anomaly detection + anomaly_signals = pd.Series(0, index=short_vol.index) + + # Inverted term structure (short > long by significant margin) + inverted_condition = (short_medium_spread > self.vol_spread_threshold) & \ + (medium_long_spread > self.vol_spread_threshold) + anomaly_signals[inverted_condition] = -1 + + # Extremely flat term structure + flat_condition = (abs(short_medium_spread) < self.vol_spread_threshold / 2) & \ + (abs(medium_long_spread) < self.vol_spread_threshold / 2) + anomaly_signals[flat_condition] = 1 + + return anomaly_signals + + def generate_signal(self, market_data: pd.DataFrame) -> str: + """Generate volatility surface arbitrage signal""" + if len(market_data) < self.long_term_window * 2: + return 'HOLD' + + vol_structure = self.calculate_term_structure(market_data) + anomaly_signals = self.detect_term_structure_anomalies(vol_structure) + + current_signal = anomaly_signals.iloc[-1] + + if current_signal == 1: + return 'BUY' # Flat term structure - expect volatility increase + elif current_signal == -1: + return 'SELL' # Inverted term structure - expect normalization + else: + return 'HOLD' + + +class AdaptiveVolatilityAgent(BaseAgent): + """ + Adaptive volatility strategy that adjusts to changing market regimes + using multiple volatility measures and regime detection. + """ + + def __init__(self, config: dict = None): + super().__init__(config) + self.regime_window = self.config.get("regime_window", 60) + self.vol_threshold_low = self.config.get("vol_threshold_low", 0.15) + self.vol_threshold_high = self.config.get("vol_threshold_high", 0.35) + self.regime_change_threshold = self.config.get("regime_change_threshold", 0.1) + + def detect_volatility_regime(self, market_data: pd.DataFrame) -> Tuple[pd.Series, pd.Series]: + """Detect current volatility regime""" + returns = market_data['close'].pct_change() + + # Rolling volatility + rolling_vol = returns.rolling(self.regime_window).std() * np.sqrt(252) + + # Regime classification + regime = pd.Series(0, index=returns.index) # 0: Normal, 1: High Vol, -1: Low Vol + + regime[rolling_vol > self.vol_threshold_high] = 1 # High volatility regime + regime[rolling_vol < self.vol_threshold_low] = -1 # Low volatility regime + + # Regime changes + regime_changes = regime.diff().abs() > 0 + + return regime, regime_changes + + def calculate_regime_persistence(self, regime: pd.Series) -> pd.Series: + """Calculate how long current regime has persisted""" + regime_persistence = pd.Series(0, index=regime.index) + + current_regime = None + persistence_count = 0 + + for i, reg in enumerate(regime): + if reg != current_regime: + current_regime = reg + persistence_count = 1 + else: + persistence_count += 1 + + regime_persistence.iloc[i] = persistence_count + + return regime_persistence + + def generate_signal(self, market_data: pd.DataFrame) -> str: + """Generate adaptive volatility signal""" + if len(market_data) < self.regime_window * 2: + return 'HOLD' + + regime, regime_changes = self.detect_volatility_regime(market_data) + regime_persistence = self.calculate_regime_persistence(regime) + + current_regime = regime.iloc[-1] + current_persistence = regime_persistence.iloc[-1] + recent_change = regime_changes.iloc[-5:].any() # Any change in last 5 periods + + # Adaptive strategy based on regime + if current_regime == 1: # High volatility regime + if current_persistence > 10: # Persistent high vol + return 'SELL' # Expect mean reversion + else: + return 'HOLD' # Wait for regime to establish + + elif current_regime == -1: # Low volatility regime + if current_persistence > 20: # Very persistent low vol + return 'BUY' # Expect volatility expansion + else: + return 'HOLD' + + else: # Normal regime + if recent_change: + return 'HOLD' # Wait for regime to stabilize + else: + return 'HOLD' # No clear signal in normal regime + + +# Example usage and testing +if __name__ == "__main__": + # Generate sample data with volatility clustering + np.random.seed(42) + dates = pd.date_range('2020-01-01', '2023-12-31', freq='D') + + # Create GARCH-like volatility clustering + n = len(dates) + returns = np.zeros(n) + volatility = np.zeros(n) + volatility[0] = 0.02 + + # GARCH(1,1) parameters + omega = 0.00001 + alpha = 0.05 + beta = 0.9 + + for i in range(1, n): + # GARCH volatility update + volatility[i] = np.sqrt(omega + alpha * returns[i-1]**2 + beta * volatility[i-1]**2) + + # Generate return with current volatility + returns[i] = volatility[i] * np.random.randn() + + # Convert to prices + log_prices = np.cumsum(returns) + prices = 100 * np.exp(log_prices) + + # Create OHLC data (simplified) + high_prices = prices * (1 + np.abs(np.random.randn(n)) * 0.01) + low_prices = prices * (1 - np.abs(np.random.randn(n)) * 0.01) + open_prices = prices * (1 + np.random.randn(n) * 0.005) + + sample_data = pd.DataFrame({ + 'open': open_prices, + 'high': high_prices, + 'low': low_prices, + 'close': prices, + 'volume': np.random.randint(1000, 10000, n) + }, index=dates) + + # Test volatility agents + print("Testing Volatility Trading Agents:") + print("=" * 50) + + # Volatility Breakout Agent + breakout_agent = VolatilityBreakoutAgent({ + 'vol_lookback': 20, + 'breakout_threshold': 2.0, + 'vol_estimation_method': 'garman_klass' + }) + + breakout_signals = [] + for i in range(60, len(sample_data)): # Start after warmup period + signal = breakout_agent.generate_signal(sample_data.iloc[:i+1]) + breakout_signals.append(signal) + + print(f"Volatility Breakout Agent:") + print(f" Buy signals: {breakout_signals.count('BUY')}") + print(f" Sell signals: {breakout_signals.count('SELL')}") + print(f" Hold signals: {breakout_signals.count('HOLD')}") + + # VIX-based Agent + vix_agent = VIXBasedAgent({ + 'vix_window': 20, + 'vix_high_threshold': 25, + 'vix_low_threshold': 12 + }) + + vix_signals = [] + for i in range(40, len(sample_data)): + signal = vix_agent.generate_signal(sample_data.iloc[:i+1]) + vix_signals.append(signal) + + print(f"\nVIX-based Agent:") + print(f" Buy signals: {vix_signals.count('BUY')}") + print(f" Sell signals: {vix_signals.count('SELL')}") + print(f" Hold signals: {vix_signals.count('HOLD')}") + + # Adaptive Volatility Agent + adaptive_agent = AdaptiveVolatilityAgent({ + 'regime_window': 30, + 'vol_threshold_low': 0.15, + 'vol_threshold_high': 0.30 + }) + + adaptive_signals = [] + for i in range(120, len(sample_data)): + signal = adaptive_agent.generate_signal(sample_data.iloc[:i+1]) + adaptive_signals.append(signal) + + print(f"\nAdaptive Volatility Agent:") + print(f" Buy signals: {adaptive_signals.count('BUY')}") + print(f" Sell signals: {adaptive_signals.count('SELL')}") + print(f" Hold signals: {adaptive_signals.count('HOLD')}") \ No newline at end of file diff --git a/examples/complete_trading_system_example.py b/examples/complete_trading_system_example.py new file mode 100644 index 0000000..7e3f0f4 --- /dev/null +++ b/examples/complete_trading_system_example.py @@ -0,0 +1,400 @@ +""" +Complete Trading System Example + +This example demonstrates how to use all components of the trading system together: +1. Data loading and preprocessing +2. Strategy creation and optimization +3. Backtesting with advanced features +4. Portfolio management +5. Risk analysis +6. Comprehensive visualization + +This serves as a complete end-to-end example of the trading system capabilities. +""" + +import sys +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import pandas as pd +import numpy as np +import warnings +from datetime import datetime, timedelta + +# Import our custom modules +from utils.data_loader import DataLoader, DataConfig +from agents.momentum_agent import MomentumAgent +from agents.mean_reversion_agent import MeanReversionAgent +from agents.volatility_agent import VolatilityBreakoutAgent +from research.backtest_engine import EnhancedBacktester, BacktestConfig +from research.portfolio_manager import PortfolioManager, PortfolioConfig +from research.strategy_optimizer import StrategyOptimizer, OptimizationConfig, ParameterSpace +from utils.risk_analytics import RiskAnalyzer, RiskConfig +from utils.visualization import TradingVisualizer + +warnings.filterwarnings('ignore') + + +def main(): + """Main function demonstrating the complete trading system""" + + print("=" * 80) + print("COMPLETE TRADING SYSTEM DEMONSTRATION") + print("=" * 80) + + # ============================================================================ + # STEP 1: DATA LOADING AND PREPROCESSING + # ============================================================================ + print("\n1. LOADING AND PREPROCESSING DATA") + print("-" * 50) + + # Configure data loader + data_config = DataConfig( + start_date='2020-01-01', + end_date='2023-12-31', + add_technical_indicators=True, + add_market_features=True, + cache_data=True + ) + + # For this example, we'll create synthetic data since we may not have API keys + print("Creating synthetic market data for demonstration...") + + dates = pd.date_range('2020-01-01', '2023-12-31', freq='D') + n_days = len(dates) + + # Create realistic market data with trends and volatility clustering + np.random.seed(42) + + # Base return process with some autocorrelation + base_returns = np.random.randn(n_days) * 0.015 + for i in range(1, n_days): + base_returns[i] += 0.05 * base_returns[i-1] # Add some momentum + + # Add trend component + trend = np.linspace(0, 0.3, n_days) # 30% upward trend over period + + # Add volatility clustering (GARCH-like) + volatility = np.zeros(n_days) + volatility[0] = 0.02 + for i in range(1, n_days): + volatility[i] = 0.00001 + 0.05 * base_returns[i-1]**2 + 0.9 * volatility[i-1] + base_returns[i] *= np.sqrt(volatility[i]) + + # Generate prices + log_prices = np.cumsum(base_returns) + trend + prices = 100 * np.exp(log_prices) + + # Generate OHLCV data + high_prices = prices * (1 + np.abs(np.random.randn(n_days) * 0.01)) + low_prices = prices * (1 - np.abs(np.random.randn(n_days) * 0.01)) + open_prices = np.roll(prices, 1) + open_prices[0] = 100 + volumes = np.random.randint(50000, 200000, n_days) + + # Create market data DataFrame + market_data = pd.DataFrame({ + 'open': open_prices, + 'high': high_prices, + 'low': low_prices, + 'close': prices, + 'volume': volumes + }, index=dates) + + # Add technical indicators manually (simulating data loader output) + market_data['sma_20'] = market_data['close'].rolling(20).mean() + market_data['sma_50'] = market_data['close'].rolling(50).mean() + market_data['returns'] = market_data['close'].pct_change() + market_data['volatility_20'] = market_data['returns'].rolling(20).std() * np.sqrt(252) + + print(f"Created market data: {len(market_data)} days") + print(f"Price range: ${market_data['close'].min():.2f} - ${market_data['close'].max():.2f}") + + # ============================================================================ + # STEP 2: STRATEGY CREATION AND TESTING + # ============================================================================ + print("\n2. CREATING AND TESTING TRADING STRATEGIES") + print("-" * 50) + + # Create different strategies + strategies = { + 'Momentum': MomentumAgent({ + 'fast_period': 10, + 'slow_period': 30, + 'momentum_threshold': 0.02 + }), + 'Mean Reversion': MeanReversionAgent({ + 'lookback': 20, + 'z_threshold': 1.5 + }), + 'Volatility Breakout': VolatilityBreakoutAgent({ + 'vol_lookback': 20, + 'breakout_threshold': 2.0 + }) + } + + # Generate signals for each strategy + strategy_signals = {} + + for name, strategy in strategies.items(): + print(f"Generating signals for {name} strategy...") + + if name == 'Momentum': + signals_data = strategy.generate_detailed_signals(market_data) + if signals_data is not None and 'signal' in signals_data.columns: + strategy_signals[name] = signals_data['signal'] + else: + # Fallback signal generation + strategy_signals[name] = pd.Series(0, index=market_data.index) + else: + # Generate signals day by day for other strategies + signals = [] + for i in range(len(market_data)): + if i < 30: # Need minimum data + signals.append(0) + else: + data_slice = market_data.iloc[:i+1] + signal = strategy.generate_signal(data_slice) + signal_map = {'BUY': 1, 'SELL': -1, 'HOLD': 0} + signals.append(signal_map.get(signal, 0)) + + strategy_signals[name] = pd.Series(signals, index=market_data.index) + + signal_counts = strategy_signals[name].value_counts() + print(f" {name}: {signal_counts.to_dict()}") + + # ============================================================================ + # STEP 3: BACKTESTING WITH ADVANCED FEATURES + # ============================================================================ + print("\n3. BACKTESTING STRATEGIES") + print("-" * 50) + + # Configure backtesting + backtest_config = BacktestConfig( + initial_capital=100000, + commission=0.001, + slippage=0.0005, + position_sizing='percent_risk', + risk_per_trade=0.02 + ) + + # Backtest each strategy + backtest_results = {} + + for name, signals in strategy_signals.items(): + print(f"Backtesting {name} strategy...") + + backtester = EnhancedBacktester(market_data, backtest_config) + results = backtester.backtest_strategy(signals) + backtest_results[name] = results + + metrics = results['performance_metrics'] + print(f" Total Return: {metrics['total_return']:.2%}") + print(f" Sharpe Ratio: {metrics['sharpe_ratio']:.2f}") + print(f" Max Drawdown: {metrics['max_drawdown']:.2%}") + print(f" Number of Trades: {metrics['num_trades']}") + + # ============================================================================ + # STEP 4: PORTFOLIO MANAGEMENT + # ============================================================================ + print("\n4. PORTFOLIO MANAGEMENT") + print("-" * 50) + + # Create portfolio manager + portfolio_config = PortfolioConfig( + initial_capital=300000, + rebalance_frequency='monthly', + risk_budget_method='equal_risk', + max_strategy_weight=0.6 + ) + + portfolio_manager = PortfolioManager(portfolio_config) + + # Add strategy returns to portfolio + for name, results in backtest_results.items(): + returns = results['results_df']['returns'] + portfolio_manager.add_strategy(name, returns, name.lower().replace(' ', '_')) + + # Backtest portfolio + print("Running portfolio backtest...") + portfolio_results = portfolio_manager.backtest_portfolio() + + portfolio_metrics = portfolio_results['performance_metrics'] + print(f"Portfolio Results:") + print(f" Total Return: {portfolio_metrics['total_return']:.2%}") + print(f" Sharpe Ratio: {portfolio_metrics['sharpe_ratio']:.2f}") + print(f" Max Drawdown: {portfolio_metrics['max_drawdown']:.2%}") + + # ============================================================================ + # STEP 5: STRATEGY OPTIMIZATION + # ============================================================================ + print("\n5. STRATEGY OPTIMIZATION") + print("-" * 50) + + # Optimize the momentum strategy as an example + def create_momentum_strategy(params): + return MomentumAgent(params) + + def run_backtest(data, signals): + backtester = EnhancedBacktester(data, backtest_config) + return backtester.backtest_strategy(signals) + + # Define parameter space + param_space = ParameterSpace() + param_space.add_parameter('fast_period', 'integer', min=5, max=15) + param_space.add_parameter('slow_period', 'integer', min=20, max=40) + param_space.add_parameter('momentum_threshold', 'continuous', min=0.01, max=0.04) + + # Add constraint + param_space.add_constraint(lambda p: p['fast_period'] < p['slow_period']) + + # Configure optimization + opt_config = OptimizationConfig( + method='grid_search', + objective_metric='sharpe_ratio', + max_iterations=20 # Keep small for demo + ) + + # Run optimization + print("Running strategy optimization (limited iterations for demo)...") + optimizer = StrategyOptimizer(opt_config) + + try: + opt_results = optimizer.optimize_strategy( + create_momentum_strategy, run_backtest, market_data, param_space + ) + + print(f"Optimization Results:") + print(f" Best Parameters: {opt_results['best_parameters']}") + print(f" Best Score: {opt_results['best_score']:.3f}") + except Exception as e: + print(f"Optimization failed: {e}") + opt_results = None + + # ============================================================================ + # STEP 6: RISK ANALYSIS + # ============================================================================ + print("\n6. RISK ANALYSIS") + print("-" * 50) + + # Perform risk analysis on the best performing strategy + best_strategy_name = max(backtest_results.keys(), + key=lambda k: backtest_results[k]['performance_metrics']['sharpe_ratio']) + best_results = backtest_results[best_strategy_name] + best_returns = best_results['results_df']['returns'] + + print(f"Analyzing risk for best strategy: {best_strategy_name}") + + # Configure risk analysis + risk_config = RiskConfig( + var_confidence_levels=[0.01, 0.05, 0.10], + var_methods=['historical', 'parametric'] + ) + + # Run risk analysis + risk_analyzer = RiskAnalyzer(risk_config) + risk_results = risk_analyzer.comprehensive_risk_analysis( + best_returns, + portfolio_value=backtest_config.initial_capital + ) + + # Print key risk metrics + basic_metrics = risk_results['basic_metrics'] + var_metrics = risk_results['var_metrics'] + + print(f"Risk Analysis Results:") + print(f" Volatility: {basic_metrics['volatility']:.2%}") + print(f" Skewness: {basic_metrics['skewness']:.2f}") + print(f" Kurtosis: {basic_metrics['kurtosis']:.2f}") + print(f" VaR (5%): {var_metrics['5%']['var_historical']:.2%}") + print(f" CVaR (5%): {var_metrics['5%']['cvar_historical']:.2%}") + + # ============================================================================ + # STEP 7: VISUALIZATION + # ============================================================================ + print("\n7. CREATING VISUALIZATIONS") + print("-" * 50) + + # Create visualizer + visualizer = TradingVisualizer() + + # Create comprehensive dashboard for best strategy + print("Creating performance dashboard...") + + try: + # Performance dashboard + dashboard_fig = visualizer.plot_performance_dashboard(best_results) + + # Strategy comparison + comparison_fig = visualizer.plot_strategy_comparison(backtest_results) + + # Interactive dashboard + interactive_fig = visualizer.create_interactive_dashboard( + best_results, best_strategy_name + ) + + print("Visualizations created successfully!") + print("Note: In a Jupyter environment, these would display automatically.") + print("To view in a script, add .show() to each figure.") + + except Exception as e: + print(f"Visualization creation failed: {e}") + print("This might be due to missing Plotly or display environment issues.") + + # ============================================================================ + # STEP 8: SUMMARY AND CONCLUSIONS + # ============================================================================ + print("\n8. SUMMARY AND CONCLUSIONS") + print("-" * 50) + + print("Trading System Analysis Complete!") + print("\nKey Results:") + + # Best individual strategy + best_individual = max(backtest_results.items(), + key=lambda x: x[1]['performance_metrics']['sharpe_ratio']) + print(f" Best Individual Strategy: {best_individual[0]}") + print(f" Return: {best_individual[1]['performance_metrics']['total_return']:.2%}") + print(f" Sharpe: {best_individual[1]['performance_metrics']['sharpe_ratio']:.2f}") + + # Portfolio performance + print(f" Multi-Strategy Portfolio:") + print(f" Return: {portfolio_metrics['total_return']:.2%}") + print(f" Sharpe: {portfolio_metrics['sharpe_ratio']:.2f}") + print(f" Max Drawdown: {portfolio_metrics['max_drawdown']:.2%}") + + # Risk assessment + print(f" Risk Assessment:") + print(f" Portfolio Volatility: {basic_metrics['volatility']:.2%}") + print(f" Tail Risk (VaR 5%): {var_metrics['5%']['var_historical']:.2%}") + + print("\nSystem Capabilities Demonstrated:") + print(" โœ“ Data loading and preprocessing") + print(" โœ“ Multiple trading strategies") + print(" โœ“ Advanced backtesting with transaction costs") + print(" โœ“ Portfolio management and optimization") + print(" โœ“ Strategy parameter optimization") + print(" โœ“ Comprehensive risk analysis") + print(" โœ“ Professional visualization tools") + + print("\n" + "=" * 80) + print("DEMONSTRATION COMPLETE") + print("=" * 80) + + return { + 'market_data': market_data, + 'strategy_signals': strategy_signals, + 'backtest_results': backtest_results, + 'portfolio_results': portfolio_results, + 'optimization_results': opt_results, + 'risk_analysis': risk_results + } + + +if __name__ == "__main__": + # Run the complete demonstration + results = main() + + # Additional analysis could be performed here + print("\nAll results stored in 'results' dictionary for further analysis.") + print("Available keys:", list(results.keys())) \ No newline at end of file diff --git a/research/backtest-engine.py b/research/backtest-engine.py index 34e8efd..5a9c605 100644 --- a/research/backtest-engine.py +++ b/research/backtest-engine.py @@ -1,30 +1,394 @@ # %% [markdown] """ -# Vectorized Backtesting Engine +# Enhanced Vectorized Backtesting Engine -Define rules, compute P&L, and plot equity curves & drawdowns. +Comprehensive backtesting system with portfolio management, transaction costs, +slippage, risk management, and detailed performance analytics. """ # %% [code] import pandas as pd import numpy as np import matplotlib.pyplot as plt +import seaborn as sns +from typing import Dict, List, Optional, Union, Tuple +from dataclasses import dataclass +from datetime import datetime +import warnings +warnings.filterwarnings('ignore') # %% [code] -class VectorBacktester: - def __init__(self, df: pd.DataFrame): - self.df = df.copy() - self.positions = pd.Series(0, index=self.df.index) - self.returns = self.df["close"].pct_change().fillna(0) +@dataclass +class BacktestConfig: + """Configuration for backtesting parameters""" + initial_capital: float = 100000.0 + commission: float = 0.001 # 0.1% per trade + slippage: float = 0.0005 # 0.05% slippage + max_leverage: float = 1.0 + position_sizing: str = 'fixed' # 'fixed', 'percent_risk', 'kelly' + risk_per_trade: float = 0.02 # 2% risk per trade + max_positions: int = 10 + margin_requirement: float = 0.1 # 10% margin for leveraged positions - def apply_signal(self, signals: pd.Series): - self.positions = signals.shift().fillna(0) - pnl = self.positions * self.returns - self.df["equity_curve"] = (1 + pnl).cumprod() - return self.df +# %% [code] +class EnhancedBacktester: + """ + Enhanced backtesting engine with comprehensive features: + - Portfolio management + - Transaction costs and slippage + - Position sizing strategies + - Risk management + - Detailed performance metrics + """ + + def __init__(self, data: pd.DataFrame, config: BacktestConfig = None): + self.data = data.copy() + self.config = config or BacktestConfig() + self.reset() + + def reset(self): + """Reset the backtester state""" + self.portfolio_value = self.config.initial_capital + self.cash = self.config.initial_capital + self.positions = pd.Series(0.0, index=self.data.index) + self.trades = [] + self.portfolio_history = [] + self.returns = self.data['close'].pct_change().fillna(0) + + def calculate_position_size(self, price: float, signal_strength: float = 1.0, + volatility: float = None) -> float: + """Calculate position size based on configuration""" + if self.config.position_sizing == 'fixed': + return self.config.initial_capital * 0.1 # 10% of capital + elif self.config.position_sizing == 'percent_risk': + if volatility is None: + volatility = self.returns.rolling(20).std().iloc[-1] + risk_amount = self.portfolio_value * self.config.risk_per_trade + return risk_amount / (volatility * price) + elif self.config.position_sizing == 'kelly': + # Simplified Kelly criterion implementation + win_rate = 0.55 # This should be estimated from historical performance + avg_win_loss_ratio = 1.2 # This should be estimated from historical performance + kelly_fraction = win_rate - (1 - win_rate) / avg_win_loss_ratio + return self.portfolio_value * min(kelly_fraction * signal_strength, 0.25) + return self.config.initial_capital * 0.1 + + def apply_transaction_costs(self, trade_value: float) -> float: + """Apply commission and slippage to trade""" + commission_cost = abs(trade_value) * self.config.commission + slippage_cost = abs(trade_value) * self.config.slippage + return commission_cost + slippage_cost + + def backtest_strategy(self, signals: pd.Series, signal_strength: pd.Series = None) -> Dict: + """ + Run backtest with given signals + + Args: + signals: Trading signals (-1, 0, 1) + signal_strength: Optional signal strength (0-1) + """ + if signal_strength is None: + signal_strength = pd.Series(1.0, index=signals.index) + + portfolio_values = [] + cash_values = [] + position_values = [] + current_position = 0.0 + + for i, (timestamp, signal) in enumerate(signals.items()): + if i == 0: + portfolio_values.append(self.portfolio_value) + cash_values.append(self.cash) + position_values.append(0.0) + continue + + price = self.data.loc[timestamp, 'close'] + prev_price = self.data.iloc[i-1]['close'] if i > 0 else price + + # Update portfolio value based on price changes + if current_position != 0: + price_change = (price - prev_price) / prev_price + position_pnl = current_position * price_change * prev_price + self.portfolio_value += position_pnl + + # Handle new signals + if signal != 0 and signal != current_position: + # Close existing position + if current_position != 0: + trade_value = current_position * price + transaction_cost = self.apply_transaction_costs(trade_value) + self.cash += trade_value - transaction_cost + + # Record trade + self.trades.append({ + 'timestamp': timestamp, + 'type': 'CLOSE', + 'size': -current_position, + 'price': price, + 'value': trade_value, + 'cost': transaction_cost + }) + current_position = 0.0 + + # Open new position + if signal != 0: + strength = signal_strength.loc[timestamp] + position_size = self.calculate_position_size(price, strength) + position_size *= signal # Apply signal direction + + # Check if we have enough cash/margin + required_cash = abs(position_size * price) + if self.config.max_leverage > 1: + required_cash *= self.config.margin_requirement + + if required_cash <= self.cash: + trade_value = position_size * price + transaction_cost = self.apply_transaction_costs(trade_value) + self.cash -= required_cash + transaction_cost + current_position = position_size + + # Record trade + self.trades.append({ + 'timestamp': timestamp, + 'type': 'OPEN', + 'size': position_size, + 'price': price, + 'value': trade_value, + 'cost': transaction_cost + }) + + # Update portfolio tracking + position_value = current_position * price if current_position != 0 else 0.0 + self.portfolio_value = self.cash + position_value + + portfolio_values.append(self.portfolio_value) + cash_values.append(self.cash) + position_values.append(position_value) + self.positions.iloc[i] = current_position + + # Create results DataFrame + results = pd.DataFrame({ + 'portfolio_value': portfolio_values, + 'cash': cash_values, + 'position_value': position_values, + 'positions': self.positions, + 'returns': pd.Series(portfolio_values).pct_change().fillna(0), + 'price': self.data['close'] + }, index=self.data.index) + + return self._calculate_performance_metrics(results) + + def _calculate_performance_metrics(self, results: pd.DataFrame) -> Dict: + """Calculate comprehensive performance metrics""" + returns = results['returns'] + portfolio_values = results['portfolio_value'] + + # Basic metrics + total_return = (portfolio_values.iloc[-1] / self.config.initial_capital) - 1 + annualized_return = (1 + total_return) ** (252 / len(returns)) - 1 + + # Risk metrics + volatility = returns.std() * np.sqrt(252) + sharpe_ratio = (annualized_return - 0.03) / volatility if volatility > 0 else 0 + + # Drawdown analysis + rolling_max = portfolio_values.expanding().max() + drawdown = (portfolio_values - rolling_max) / rolling_max + max_drawdown = drawdown.min() + + # Trade analysis + num_trades = len(self.trades) + winning_trades = sum(1 for trade in self.trades if trade.get('pnl', 0) > 0) + win_rate = winning_trades / num_trades if num_trades > 0 else 0 + + # Additional metrics + sortino_ratio = self._calculate_sortino_ratio(returns) + calmar_ratio = annualized_return / abs(max_drawdown) if max_drawdown != 0 else 0 + + return { + 'results_df': results, + 'total_return': total_return, + 'annualized_return': annualized_return, + 'volatility': volatility, + 'sharpe_ratio': sharpe_ratio, + 'sortino_ratio': sortino_ratio, + 'calmar_ratio': calmar_ratio, + 'max_drawdown': max_drawdown, + 'num_trades': num_trades, + 'win_rate': win_rate, + 'final_portfolio_value': portfolio_values.iloc[-1], + 'trades': self.trades + } + + def _calculate_sortino_ratio(self, returns: pd.Series) -> float: + """Calculate Sortino ratio""" + excess_returns = returns - 0.03/252 # Assuming 3% risk-free rate + negative_returns = returns[returns < 0] + if len(negative_returns) == 0: + return np.inf + downside_deviation = negative_returns.std() * np.sqrt(252) + return excess_returns.mean() * np.sqrt(252) / downside_deviation + + def plot_results(self, results: Dict, figsize: Tuple[int, int] = (15, 10)): + """Plot comprehensive backtesting results""" + fig, axes = plt.subplots(2, 2, figsize=figsize) + results_df = results['results_df'] + + # Portfolio value over time + axes[0, 0].plot(results_df.index, results_df['portfolio_value'], + label='Portfolio Value', linewidth=2) + axes[0, 0].axhline(y=self.config.initial_capital, color='r', + linestyle='--', alpha=0.7, label='Initial Capital') + axes[0, 0].set_title('Portfolio Value Over Time') + axes[0, 0].set_ylabel('Value ($)') + axes[0, 0].legend() + axes[0, 0].grid(True, alpha=0.3) + + # Drawdown + rolling_max = results_df['portfolio_value'].expanding().max() + drawdown = (results_df['portfolio_value'] - rolling_max) / rolling_max * 100 + axes[0, 1].fill_between(results_df.index, drawdown, 0, + color='red', alpha=0.3) + axes[0, 1].plot(results_df.index, drawdown, color='red', linewidth=1) + axes[0, 1].set_title(f'Drawdown (Max: {results["max_drawdown"]:.2%})') + axes[0, 1].set_ylabel('Drawdown (%)') + axes[0, 1].grid(True, alpha=0.3) + + # Returns distribution + axes[1, 0].hist(results_df['returns'] * 100, bins=50, alpha=0.7, + edgecolor='black') + axes[1, 0].axvline(results_df['returns'].mean() * 100, color='red', + linestyle='--', label=f'Mean: {results_df["returns"].mean()*100:.3f}%') + axes[1, 0].set_title('Daily Returns Distribution') + axes[1, 0].set_xlabel('Daily Return (%)') + axes[1, 0].set_ylabel('Frequency') + axes[1, 0].legend() + axes[1, 0].grid(True, alpha=0.3) + + # Rolling Sharpe ratio + rolling_sharpe = results_df['returns'].rolling(60).mean() / results_df['returns'].rolling(60).std() * np.sqrt(252) + axes[1, 1].plot(results_df.index, rolling_sharpe, linewidth=2) + axes[1, 1].axhline(y=1, color='r', linestyle='--', alpha=0.7, label='Sharpe = 1') + axes[1, 1].set_title(f'60-Day Rolling Sharpe Ratio (Final: {results["sharpe_ratio"]:.2f})') + axes[1, 1].set_ylabel('Sharpe Ratio') + axes[1, 1].legend() + axes[1, 1].grid(True, alpha=0.3) + + plt.tight_layout() + plt.show() + + # Print performance summary + self._print_performance_summary(results) + + def _print_performance_summary(self, results: Dict): + """Print formatted performance summary""" + print("=" * 60) + print("BACKTESTING PERFORMANCE SUMMARY") + print("=" * 60) + print(f"Initial Capital: ${self.config.initial_capital:,.2f}") + print(f"Final Portfolio: ${results['final_portfolio_value']:,.2f}") + print(f"Total Return: {results['total_return']:.2%}") + print(f"Annualized Return: {results['annualized_return']:.2%}") + print(f"Volatility: {results['volatility']:.2%}") + print(f"Sharpe Ratio: {results['sharpe_ratio']:.2f}") + print(f"Sortino Ratio: {results['sortino_ratio']:.2f}") + print(f"Calmar Ratio: {results['calmar_ratio']:.2f}") + print(f"Maximum Drawdown: {results['max_drawdown']:.2%}") + print(f"Number of Trades: {results['num_trades']}") + print(f"Win Rate: {results['win_rate']:.2%}") + print("=" * 60) + +# %% [code] +class StrategyComparator: + """Compare multiple strategies side by side""" + + def __init__(self, data: pd.DataFrame, config: BacktestConfig = None): + self.data = data + self.config = config or BacktestConfig() + self.results = {} + + def add_strategy(self, name: str, signals: pd.Series, signal_strength: pd.Series = None): + """Add a strategy for comparison""" + backtester = EnhancedBacktester(self.data, self.config) + result = backtester.backtest_strategy(signals, signal_strength) + self.results[name] = result + + def compare_strategies(self) -> pd.DataFrame: + """Create comparison table of all strategies""" + comparison_data = [] + for name, result in self.results.items(): + comparison_data.append({ + 'Strategy': name, + 'Total Return': f"{result['total_return']:.2%}", + 'Ann. Return': f"{result['annualized_return']:.2%}", + 'Volatility': f"{result['volatility']:.2%}", + 'Sharpe Ratio': f"{result['sharpe_ratio']:.2f}", + 'Max Drawdown': f"{result['max_drawdown']:.2%}", + 'Num Trades': result['num_trades'], + 'Win Rate': f"{result['win_rate']:.2%}" + }) + + return pd.DataFrame(comparison_data) + + def plot_comparison(self, figsize: Tuple[int, int] = (15, 8)): + """Plot comparison of strategy performance""" + fig, axes = plt.subplots(1, 2, figsize=figsize) + + # Portfolio values + for name, result in self.results.items(): + results_df = result['results_df'] + axes[0].plot(results_df.index, results_df['portfolio_value'], + label=name, linewidth=2) + + axes[0].axhline(y=self.config.initial_capital, color='black', + linestyle='--', alpha=0.5, label='Initial Capital') + axes[0].set_title('Portfolio Value Comparison') + axes[0].set_ylabel('Portfolio Value ($)') + axes[0].legend() + axes[0].grid(True, alpha=0.3) + + # Risk-Return scatter + returns = [result['annualized_return'] for result in self.results.values()] + volatilities = [result['volatility'] for result in self.results.values()] + names = list(self.results.keys()) + + axes[1].scatter(volatilities, returns, s=100, alpha=0.7) + for i, name in enumerate(names): + axes[1].annotate(name, (volatilities[i], returns[i]), + xytext=(5, 5), textcoords='offset points') + + axes[1].set_xlabel('Volatility') + axes[1].set_ylabel('Annualized Return') + axes[1].set_title('Risk-Return Profile') + axes[1].grid(True, alpha=0.3) + + plt.tight_layout() + plt.show() # %% [code] -# Example usage -# signals = pd.Series([...], index=df.index) -# result = VectorBacktester(df).apply_signal(signals) -# plt.plot(result["equity_curve"]) +# Example usage and testing +if __name__ == "__main__": + # Generate sample data + np.random.seed(42) + dates = pd.date_range('2020-01-01', '2023-12-31', freq='D') + prices = 100 * np.exp(np.cumsum(np.random.randn(len(dates)) * 0.01)) + + sample_data = pd.DataFrame({ + 'close': prices, + 'volume': np.random.randint(1000, 10000, len(dates)) + }, index=dates) + + # Create simple moving average crossover signals + short_ma = sample_data['close'].rolling(10).mean() + long_ma = sample_data['close'].rolling(30).mean() + signals = pd.Series(0, index=sample_data.index) + signals[short_ma > long_ma] = 1 + signals[short_ma < long_ma] = -1 + + # Run backtest + config = BacktestConfig(initial_capital=100000, commission=0.001) + backtester = EnhancedBacktester(sample_data, config) + results = backtester.backtest_strategy(signals) + + # Display results + backtester.plot_results(results) diff --git a/research/portfolio-manager.py b/research/portfolio-manager.py new file mode 100644 index 0000000..31f8434 --- /dev/null +++ b/research/portfolio-manager.py @@ -0,0 +1,568 @@ +""" +Portfolio Management System + +Multi-strategy portfolio management with: +- Dynamic allocation +- Risk budgeting +- Correlation management +- Performance attribution +- Rebalancing strategies +""" + +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +from typing import Dict, List, Optional, Tuple, Union +from dataclasses import dataclass +from scipy.optimize import minimize +from scipy import stats +import warnings +warnings.filterwarnings('ignore') + + +@dataclass +class PortfolioConfig: + """Configuration for portfolio management""" + initial_capital: float = 1000000.0 + max_strategy_weight: float = 0.4 # Maximum weight per strategy + min_strategy_weight: float = 0.05 # Minimum weight per strategy + rebalance_frequency: str = 'monthly' # 'daily', 'weekly', 'monthly', 'quarterly' + risk_budget_method: str = 'equal_risk' # 'equal_weight', 'equal_risk', 'risk_parity', 'mean_variance' + max_correlation: float = 0.8 # Maximum correlation between strategies + volatility_target: float = 0.15 # Target portfolio volatility + max_drawdown_limit: float = 0.15 # Maximum allowed drawdown + transaction_cost: float = 0.001 # Transaction cost for rebalancing + + +class PortfolioManager: + """ + Advanced portfolio management system for multi-strategy allocation. + """ + + def __init__(self, config: PortfolioConfig = None): + self.config = config or PortfolioConfig() + self.strategies = {} + self.weights = {} + self.portfolio_history = [] + self.rebalance_dates = [] + self.transaction_costs = [] + + def add_strategy(self, name: str, returns: pd.Series, + strategy_type: str = "unknown", + benchmark: pd.Series = None): + """Add a strategy to the portfolio""" + self.strategies[name] = { + 'returns': returns, + 'type': strategy_type, + 'benchmark': benchmark, + 'sharpe_ratio': self._calculate_sharpe(returns), + 'volatility': returns.std() * np.sqrt(252), + 'max_drawdown': self._calculate_max_drawdown(returns.cumsum()) + } + + def _calculate_sharpe(self, returns: pd.Series, rf_rate: float = 0.03) -> float: + """Calculate Sharpe ratio""" + excess_returns = returns - rf_rate / 252 + return excess_returns.mean() / returns.std() * np.sqrt(252) if returns.std() > 0 else 0 + + def _calculate_max_drawdown(self, cumulative_returns: pd.Series) -> float: + """Calculate maximum drawdown""" + peak = cumulative_returns.expanding().max() + drawdown = (cumulative_returns - peak) / peak + return drawdown.min() + + def calculate_correlation_matrix(self, lookback_days: int = 252) -> pd.DataFrame: + """Calculate correlation matrix of strategy returns""" + strategy_names = list(self.strategies.keys()) + returns_df = pd.DataFrame() + + for name in strategy_names: + returns_df[name] = self.strategies[name]['returns'] + + # Use rolling correlation if specified + if lookback_days: + correlation_matrix = returns_df.tail(lookback_days).corr() + else: + correlation_matrix = returns_df.corr() + + return correlation_matrix + + def optimize_weights_equal_risk(self, returns_df: pd.DataFrame) -> np.ndarray: + """Equal risk contribution optimization""" + n_assets = len(returns_df.columns) + cov_matrix = returns_df.cov() * 252 # Annualized covariance + + def risk_budget_objective(weights, cov_matrix): + """Objective function for equal risk contribution""" + portfolio_vol = np.sqrt(np.dot(weights, np.dot(cov_matrix, weights))) + marginal_contrib = np.dot(cov_matrix, weights) / portfolio_vol + contrib = weights * marginal_contrib + target_contrib = portfolio_vol / n_assets + return np.sum((contrib - target_contrib) ** 2) + + # Constraints + constraints = [ + {'type': 'eq', 'fun': lambda x: np.sum(x) - 1.0}, # Weights sum to 1 + ] + + # Bounds + bounds = tuple((self.config.min_strategy_weight, self.config.max_strategy_weight) + for _ in range(n_assets)) + + # Initial guess + x0 = np.array([1.0 / n_assets] * n_assets) + + # Optimize + result = minimize(risk_budget_objective, x0, + args=(cov_matrix,), method='SLSQP', + bounds=bounds, constraints=constraints) + + return result.x if result.success else x0 + + def optimize_weights_mean_variance(self, returns_df: pd.DataFrame, + target_return: float = None) -> np.ndarray: + """Mean-variance optimization""" + n_assets = len(returns_df.columns) + mean_returns = returns_df.mean() * 252 # Annualized returns + cov_matrix = returns_df.cov() * 252 # Annualized covariance + + if target_return is None: + target_return = mean_returns.mean() + + def portfolio_variance(weights, cov_matrix): + return np.dot(weights, np.dot(cov_matrix, weights)) + + # Constraints + constraints = [ + {'type': 'eq', 'fun': lambda x: np.sum(x) - 1.0}, # Weights sum to 1 + {'type': 'eq', 'fun': lambda x: np.dot(x, mean_returns) - target_return} # Target return + ] + + # Bounds + bounds = tuple((self.config.min_strategy_weight, self.config.max_strategy_weight) + for _ in range(n_assets)) + + # Initial guess + x0 = np.array([1.0 / n_assets] * n_assets) + + # Optimize + result = minimize(portfolio_variance, x0, + args=(cov_matrix,), method='SLSQP', + bounds=bounds, constraints=constraints) + + return result.x if result.success else x0 + + def optimize_weights_max_diversification(self, returns_df: pd.DataFrame) -> np.ndarray: + """Maximum diversification optimization""" + n_assets = len(returns_df.columns) + volatilities = returns_df.std() * np.sqrt(252) # Annualized volatilities + cov_matrix = returns_df.cov() * 252 + + def diversification_ratio(weights, volatilities, cov_matrix): + """Diversification ratio to maximize""" + weighted_vol = np.dot(weights, volatilities) + portfolio_vol = np.sqrt(np.dot(weights, np.dot(cov_matrix, weights))) + return -weighted_vol / portfolio_vol # Negative for maximization + + # Constraints + constraints = [ + {'type': 'eq', 'fun': lambda x: np.sum(x) - 1.0}, # Weights sum to 1 + ] + + # Bounds + bounds = tuple((self.config.min_strategy_weight, self.config.max_strategy_weight) + for _ in range(n_assets)) + + # Initial guess + x0 = np.array([1.0 / n_assets] * n_assets) + + # Optimize + result = minimize(diversification_ratio, x0, + args=(volatilities, cov_matrix), method='SLSQP', + bounds=bounds, constraints=constraints) + + return result.x if result.success else x0 + + def calculate_optimal_weights(self, rebalance_date: pd.Timestamp, + lookback_days: int = 252) -> Dict[str, float]: + """Calculate optimal portfolio weights""" + strategy_names = list(self.strategies.keys()) + + if not strategy_names: + return {} + + # Create returns DataFrame for optimization + returns_df = pd.DataFrame() + for name in strategy_names: + strategy_returns = self.strategies[name]['returns'] + # Get returns up to rebalance date + available_returns = strategy_returns[strategy_returns.index <= rebalance_date] + if len(available_returns) >= lookback_days: + returns_df[name] = available_returns.tail(lookback_days) + + if returns_df.empty or len(returns_df.columns) == 0: + # Equal weights as fallback + equal_weight = 1.0 / len(strategy_names) + return {name: equal_weight for name in strategy_names} + + # Remove strategies with insufficient data + valid_strategies = returns_df.columns.tolist() + returns_df = returns_df.dropna() + + if len(returns_df) < 60: # Minimum data requirement + equal_weight = 1.0 / len(valid_strategies) + return {name: equal_weight for name in valid_strategies} + + # Check correlations and remove highly correlated strategies + corr_matrix = returns_df.corr() + to_remove = set() + + for i in range(len(corr_matrix.columns)): + for j in range(i + 1, len(corr_matrix.columns)): + if abs(corr_matrix.iloc[i, j]) > self.config.max_correlation: + # Remove strategy with lower Sharpe ratio + strategy1 = corr_matrix.columns[i] + strategy2 = corr_matrix.columns[j] + + sharpe1 = self.strategies[strategy1]['sharpe_ratio'] + sharpe2 = self.strategies[strategy2]['sharpe_ratio'] + + if sharpe1 < sharpe2: + to_remove.add(strategy1) + else: + to_remove.add(strategy2) + + # Remove highly correlated strategies + final_strategies = [s for s in valid_strategies if s not in to_remove] + if not final_strategies: + final_strategies = valid_strategies[:1] # Keep at least one strategy + + returns_df = returns_df[final_strategies] + + # Optimize weights based on method + if self.config.risk_budget_method == 'equal_weight': + weights = np.array([1.0 / len(final_strategies)] * len(final_strategies)) + elif self.config.risk_budget_method == 'equal_risk': + weights = self.optimize_weights_equal_risk(returns_df) + elif self.config.risk_budget_method == 'mean_variance': + weights = self.optimize_weights_mean_variance(returns_df) + elif self.config.risk_budget_method == 'max_diversification': + weights = self.optimize_weights_max_diversification(returns_df) + else: + weights = np.array([1.0 / len(final_strategies)] * len(final_strategies)) + + # Create weights dictionary + weight_dict = {} + for i, strategy in enumerate(final_strategies): + weight_dict[strategy] = weights[i] + + # Add zero weights for removed strategies + for strategy in strategy_names: + if strategy not in weight_dict: + weight_dict[strategy] = 0.0 + + return weight_dict + + def get_rebalance_dates(self, start_date: pd.Timestamp, + end_date: pd.Timestamp) -> List[pd.Timestamp]: + """Get rebalancing dates based on frequency""" + dates = [] + current_date = start_date + + if self.config.rebalance_frequency == 'daily': + dates = pd.date_range(start_date, end_date, freq='D').tolist() + elif self.config.rebalance_frequency == 'weekly': + dates = pd.date_range(start_date, end_date, freq='W').tolist() + elif self.config.rebalance_frequency == 'monthly': + dates = pd.date_range(start_date, end_date, freq='M').tolist() + elif self.config.rebalance_frequency == 'quarterly': + dates = pd.date_range(start_date, end_date, freq='Q').tolist() + + return [pd.Timestamp(date) for date in dates] + + def calculate_transaction_costs(self, old_weights: Dict[str, float], + new_weights: Dict[str, float], + portfolio_value: float) -> float: + """Calculate transaction costs for rebalancing""" + total_turnover = 0.0 + + for strategy in set(list(old_weights.keys()) + list(new_weights.keys())): + old_weight = old_weights.get(strategy, 0.0) + new_weight = new_weights.get(strategy, 0.0) + total_turnover += abs(new_weight - old_weight) + + return total_turnover * portfolio_value * self.config.transaction_cost + + def backtest_portfolio(self, start_date: pd.Timestamp = None, + end_date: pd.Timestamp = None) -> Dict: + """Backtest the multi-strategy portfolio""" + if not self.strategies: + raise ValueError("No strategies added to portfolio") + + # Get common date range + all_dates = set() + for strategy in self.strategies.values(): + all_dates.update(strategy['returns'].index) + + all_dates = sorted(list(all_dates)) + + if start_date is None: + start_date = pd.Timestamp(all_dates[252]) # Skip first year for warmup + if end_date is None: + end_date = pd.Timestamp(all_dates[-1]) + + # Filter dates + backtest_dates = [date for date in all_dates if start_date <= date <= end_date] + + # Get rebalancing dates + rebalance_dates = self.get_rebalance_dates(start_date, end_date) + rebalance_dates = [date for date in rebalance_dates if date in backtest_dates] + + # Initialize portfolio + portfolio_value = self.config.initial_capital + current_weights = {} + portfolio_returns = [] + portfolio_values = [portfolio_value] + weights_history = [] + transaction_costs_history = [] + + for i, date in enumerate(backtest_dates): + # Check if rebalancing is needed + if date in rebalance_dates or not current_weights: + old_weights = current_weights.copy() + new_weights = self.calculate_optimal_weights(date) + + # Calculate transaction costs + if old_weights: + transaction_cost = self.calculate_transaction_costs( + old_weights, new_weights, portfolio_value + ) + portfolio_value -= transaction_cost + transaction_costs_history.append(transaction_cost) + else: + transaction_costs_history.append(0.0) + + current_weights = new_weights + weights_history.append((date, current_weights.copy())) + + # Calculate portfolio return for this period + portfolio_return = 0.0 + for strategy_name, weight in current_weights.items(): + if weight > 0: + strategy_returns = self.strategies[strategy_name]['returns'] + if date in strategy_returns.index: + strategy_return = strategy_returns[date] + portfolio_return += weight * strategy_return + + # Update portfolio value + portfolio_value *= (1 + portfolio_return) + portfolio_returns.append(portfolio_return) + portfolio_values.append(portfolio_value) + + # Create results DataFrame + results_df = pd.DataFrame({ + 'portfolio_value': portfolio_values[1:], # Skip initial value + 'portfolio_returns': portfolio_returns + }, index=backtest_dates) + + # Calculate performance metrics + portfolio_returns_series = pd.Series(portfolio_returns, index=backtest_dates) + + performance_metrics = self._calculate_portfolio_metrics( + results_df, portfolio_returns_series + ) + + return { + 'results_df': results_df, + 'performance_metrics': performance_metrics, + 'weights_history': weights_history, + 'transaction_costs': sum(transaction_costs_history), + 'rebalance_dates': rebalance_dates + } + + def _calculate_portfolio_metrics(self, results_df: pd.DataFrame, + returns: pd.Series) -> Dict: + """Calculate comprehensive portfolio performance metrics""" + portfolio_values = results_df['portfolio_value'] + + # Basic metrics + total_return = (portfolio_values.iloc[-1] / self.config.initial_capital) - 1 + annualized_return = (1 + total_return) ** (252 / len(returns)) - 1 + + # Risk metrics + volatility = returns.std() * np.sqrt(252) + sharpe_ratio = (annualized_return - 0.03) / volatility if volatility > 0 else 0 + + # Drawdown analysis + rolling_max = portfolio_values.expanding().max() + drawdown = (portfolio_values - rolling_max) / rolling_max + max_drawdown = drawdown.min() + + # Additional metrics + sortino_ratio = self._calculate_sortino_ratio(returns) + calmar_ratio = annualized_return / abs(max_drawdown) if max_drawdown != 0 else 0 + + # VaR and CVaR + var_95 = returns.quantile(0.05) + cvar_95 = returns[returns <= var_95].mean() + + return { + 'total_return': total_return, + 'annualized_return': annualized_return, + 'volatility': volatility, + 'sharpe_ratio': sharpe_ratio, + 'sortino_ratio': sortino_ratio, + 'calmar_ratio': calmar_ratio, + 'max_drawdown': max_drawdown, + 'var_95': var_95, + 'cvar_95': cvar_95, + 'final_portfolio_value': portfolio_values.iloc[-1] + } + + def _calculate_sortino_ratio(self, returns: pd.Series) -> float: + """Calculate Sortino ratio""" + excess_returns = returns - 0.03/252 + negative_returns = returns[returns < 0] + if len(negative_returns) == 0: + return np.inf + downside_deviation = negative_returns.std() * np.sqrt(252) + return excess_returns.mean() * np.sqrt(252) / downside_deviation + + def plot_portfolio_performance(self, backtest_results: Dict, + figsize: Tuple[int, int] = (15, 12)): + """Plot comprehensive portfolio performance analysis""" + results_df = backtest_results['results_df'] + weights_history = backtest_results['weights_history'] + metrics = backtest_results['performance_metrics'] + + fig, axes = plt.subplots(2, 2, figsize=figsize) + + # Portfolio value over time + axes[0, 0].plot(results_df.index, results_df['portfolio_value'], + linewidth=2, label='Portfolio Value') + axes[0, 0].axhline(y=self.config.initial_capital, color='r', + linestyle='--', alpha=0.7, label='Initial Capital') + axes[0, 0].set_title('Portfolio Value Over Time') + axes[0, 0].set_ylabel('Value ($)') + axes[0, 0].legend() + axes[0, 0].grid(True, alpha=0.3) + + # Drawdown + rolling_max = results_df['portfolio_value'].expanding().max() + drawdown = (results_df['portfolio_value'] - rolling_max) / rolling_max * 100 + axes[0, 1].fill_between(results_df.index, drawdown, 0, + color='red', alpha=0.3) + axes[0, 1].plot(results_df.index, drawdown, color='red', linewidth=1) + axes[0, 1].set_title(f'Drawdown (Max: {metrics["max_drawdown"]:.2%})') + axes[0, 1].set_ylabel('Drawdown (%)') + axes[0, 1].grid(True, alpha=0.3) + + # Returns distribution + axes[1, 0].hist(results_df['portfolio_returns'] * 100, bins=50, + alpha=0.7, edgecolor='black') + axes[1, 0].axvline(results_df['portfolio_returns'].mean() * 100, + color='red', linestyle='--', + label=f'Mean: {results_df["portfolio_returns"].mean()*100:.3f}%') + axes[1, 0].set_title('Daily Returns Distribution') + axes[1, 0].set_xlabel('Daily Return (%)') + axes[1, 0].set_ylabel('Frequency') + axes[1, 0].legend() + axes[1, 0].grid(True, alpha=0.3) + + # Strategy weights over time + if weights_history: + strategy_names = list(self.strategies.keys()) + weight_dates = [item[0] for item in weights_history] + + for strategy in strategy_names: + weights = [item[1].get(strategy, 0) for item in weights_history] + axes[1, 1].plot(weight_dates, weights, marker='o', + label=strategy, alpha=0.7) + + axes[1, 1].set_title('Strategy Weights Over Time') + axes[1, 1].set_ylabel('Weight') + axes[1, 1].legend(bbox_to_anchor=(1.05, 1), loc='upper left') + axes[1, 1].grid(True, alpha=0.3) + + plt.tight_layout() + plt.show() + + # Print performance summary + self._print_portfolio_summary(metrics, backtest_results) + + def _print_portfolio_summary(self, metrics: Dict, backtest_results: Dict): + """Print formatted portfolio performance summary""" + print("=" * 70) + print("MULTI-STRATEGY PORTFOLIO PERFORMANCE SUMMARY") + print("=" * 70) + print(f"Initial Capital: ${self.config.initial_capital:,.2f}") + print(f"Final Portfolio Value: ${metrics['final_portfolio_value']:,.2f}") + print(f"Total Return: {metrics['total_return']:.2%}") + print(f"Annualized Return: {metrics['annualized_return']:.2%}") + print(f"Volatility: {metrics['volatility']:.2%}") + print(f"Sharpe Ratio: {metrics['sharpe_ratio']:.2f}") + print(f"Sortino Ratio: {metrics['sortino_ratio']:.2f}") + print(f"Calmar Ratio: {metrics['calmar_ratio']:.2f}") + print(f"Maximum Drawdown: {metrics['max_drawdown']:.2%}") + print(f"VaR (95%): {metrics['var_95']:.2%}") + print(f"CVaR (95%): {metrics['cvar_95']:.2%}") + print(f"Transaction Costs: ${backtest_results['transaction_costs']:,.2f}") + print(f"Number of Rebalances: {len(backtest_results['rebalance_dates'])}") + print("=" * 70) + + # Strategy contribution analysis + print("\nSTRATEGY ANALYSIS:") + print("-" * 40) + for name, strategy in self.strategies.items(): + print(f"{name}:") + print(f" Sharpe Ratio: {strategy['sharpe_ratio']:.2f}") + print(f" Volatility: {strategy['volatility']:.2%}") + print(f" Max Drawdown: {strategy['max_drawdown']:.2%}") + print() + + +# Example usage +if __name__ == "__main__": + # Generate sample strategy returns + np.random.seed(42) + dates = pd.date_range('2020-01-01', '2023-12-31', freq='D') + + # Create different strategy return patterns + trend_following = np.random.randn(len(dates)) * 0.01 + 0.0003 # Slight positive drift + mean_reversion = np.random.randn(len(dates)) * 0.008 + 0.0002 + momentum = np.random.randn(len(dates)) * 0.012 + 0.0004 + volatility = np.random.randn(len(dates)) * 0.015 + 0.0001 + + # Add some correlation structure + common_factor = np.random.randn(len(dates)) * 0.005 + trend_following += 0.3 * common_factor + momentum += 0.4 * common_factor + + # Convert to pandas Series + strategy_returns = { + 'Trend Following': pd.Series(trend_following, index=dates), + 'Mean Reversion': pd.Series(mean_reversion, index=dates), + 'Momentum': pd.Series(momentum, index=dates), + 'Volatility': pd.Series(volatility, index=dates) + } + + # Create portfolio manager + config = PortfolioConfig( + initial_capital=1000000, + rebalance_frequency='monthly', + risk_budget_method='equal_risk', + max_strategy_weight=0.6, + min_strategy_weight=0.1 + ) + + portfolio_manager = PortfolioManager(config) + + # Add strategies + for name, returns in strategy_returns.items(): + portfolio_manager.add_strategy(name, returns, name.lower().replace(' ', '_')) + + # Backtest portfolio + backtest_results = portfolio_manager.backtest_portfolio() + + # Display results + portfolio_manager.plot_portfolio_performance(backtest_results) \ No newline at end of file diff --git a/research/strategy-optimizer.py b/research/strategy-optimizer.py new file mode 100644 index 0000000..cf665a9 --- /dev/null +++ b/research/strategy-optimizer.py @@ -0,0 +1,733 @@ +""" +Strategy Optimization and Walk-Forward Analysis + +Advanced parameter optimization system: +- Grid search and random search +- Bayesian optimization +- Genetic algorithms +- Walk-forward analysis +- Monte Carlo simulation +- Overfitting detection +""" + +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +from typing import Dict, List, Optional, Tuple, Any, Callable, Union +from dataclasses import dataclass, field +from itertools import product +import warnings +from concurrent.futures import ProcessPoolExecutor, as_completed +from scipy.optimize import minimize +from sklearn.model_selection import ParameterGrid, ParameterSampler +from sklearn.gaussian_process import GaussianProcessRegressor +from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C +import optuna +from datetime import datetime, timedelta +import pickle +import json + +warnings.filterwarnings('ignore') + + +@dataclass +class OptimizationConfig: + """Configuration for strategy optimization""" + # Optimization method + method: str = 'grid_search' # 'grid_search', 'random_search', 'bayesian', 'genetic', 'optuna' + + # Search parameters + max_iterations: int = 100 + n_random_starts: int = 10 + cv_folds: int = 5 + + # Objective function + objective_metric: str = 'sharpe_ratio' # 'sharpe_ratio', 'calmar_ratio', 'sortino_ratio', 'total_return' + maximize: bool = True + + # Walk-forward analysis + training_window: int = 252 # Trading days for training + testing_window: int = 63 # Trading days for testing + step_size: int = 21 # Days to step forward + min_trades: int = 10 # Minimum trades required + + # Overfitting detection + max_sharpe_threshold: float = 3.0 # Flag potentially overfit strategies + consistency_threshold: float = 0.7 # Minimum consistency across periods + + # Parallel processing + n_jobs: int = -1 # Number of parallel jobs + + # Storage + save_results: bool = True + results_directory: str = './optimization_results' + + +class ParameterSpace: + """Define parameter search space""" + + def __init__(self): + self.parameters = {} + self.constraints = [] + + def add_parameter(self, name: str, param_type: str, **kwargs): + """Add a parameter to the search space""" + self.parameters[name] = { + 'type': param_type, + **kwargs + } + + def add_constraint(self, constraint_func: Callable): + """Add a constraint function""" + self.constraints.append(constraint_func) + + def generate_grid(self) -> List[Dict]: + """Generate parameter grid for grid search""" + param_lists = {} + + for name, param_info in self.parameters.items(): + if param_info['type'] == 'discrete': + param_lists[name] = param_info['values'] + elif param_info['type'] == 'continuous': + start, end, step = param_info['min'], param_info['max'], param_info.get('step', 0.1) + param_lists[name] = np.arange(start, end + step, step).tolist() + elif param_info['type'] == 'integer': + start, end = param_info['min'], param_info['max'] + param_lists[name] = list(range(start, end + 1)) + + # Generate all combinations + grid = list(ParameterGrid(param_lists)) + + # Apply constraints + if self.constraints: + filtered_grid = [] + for params in grid: + if all(constraint(params) for constraint in self.constraints): + filtered_grid.append(params) + return filtered_grid + + return grid + + def sample_random(self, n_samples: int) -> List[Dict]: + """Generate random parameter samples""" + samples = [] + + for _ in range(n_samples): + sample = {} + + for name, param_info in self.parameters.items(): + if param_info['type'] == 'discrete': + sample[name] = np.random.choice(param_info['values']) + elif param_info['type'] == 'continuous': + sample[name] = np.random.uniform(param_info['min'], param_info['max']) + elif param_info['type'] == 'integer': + sample[name] = np.random.randint(param_info['min'], param_info['max'] + 1) + + # Check constraints + if not self.constraints or all(constraint(sample) for constraint in self.constraints): + samples.append(sample) + + return samples + + +class ObjectiveFunction: + """Objective function for optimization""" + + def __init__(self, strategy_func: Callable, backtest_func: Callable, + data: pd.DataFrame, config: OptimizationConfig): + self.strategy_func = strategy_func + self.backtest_func = backtest_func + self.data = data + self.config = config + self.evaluation_cache = {} + + def evaluate(self, parameters: Dict) -> float: + """Evaluate strategy with given parameters""" + # Create cache key + cache_key = tuple(sorted(parameters.items())) + + if cache_key in self.evaluation_cache: + return self.evaluation_cache[cache_key] + + try: + # Generate signals with parameters + strategy = self.strategy_func(parameters) + signals = strategy.generate_detailed_signals(self.data) + + if signals is None or 'signal' not in signals.columns: + return -np.inf if self.config.maximize else np.inf + + # Backtest strategy + results = self.backtest_func(self.data, signals['signal']) + + if not results or 'performance_metrics' not in results: + return -np.inf if self.config.maximize else np.inf + + # Extract objective metric + metrics = results['performance_metrics'] + objective_value = metrics.get(self.config.objective_metric, 0) + + # Apply constraints + num_trades = metrics.get('num_trades', 0) + if num_trades < self.config.min_trades: + objective_value = -np.inf if self.config.maximize else np.inf + + # Check for overfitting (unrealistic Sharpe ratios) + if (self.config.objective_metric == 'sharpe_ratio' and + objective_value > self.config.max_sharpe_threshold): + objective_value = self.config.max_sharpe_threshold + + # Cache result + self.evaluation_cache[cache_key] = objective_value + + return objective_value + + except Exception as e: + print(f"Error evaluating parameters {parameters}: {e}") + return -np.inf if self.config.maximize else np.inf + + +class GridSearchOptimizer: + """Grid search optimization""" + + def __init__(self, objective_func: ObjectiveFunction, config: OptimizationConfig): + self.objective_func = objective_func + self.config = config + + def optimize(self, parameter_space: ParameterSpace) -> Dict: + """Run grid search optimization""" + grid = parameter_space.generate_grid() + print(f"Grid search: evaluating {len(grid)} parameter combinations") + + results = [] + + if self.config.n_jobs == 1: + # Sequential execution + for i, params in enumerate(grid): + score = self.objective_func.evaluate(params) + results.append({ + 'parameters': params, + 'score': score, + 'iteration': i + }) + + if (i + 1) % 10 == 0: + print(f"Completed {i + 1}/{len(grid)} evaluations") + else: + # Parallel execution + with ProcessPoolExecutor(max_workers=self.config.n_jobs) as executor: + future_to_params = { + executor.submit(self.objective_func.evaluate, params): (i, params) + for i, params in enumerate(grid) + } + + for future in as_completed(future_to_params): + i, params = future_to_params[future] + try: + score = future.result() + results.append({ + 'parameters': params, + 'score': score, + 'iteration': i + }) + except Exception as exc: + print(f"Parameter evaluation generated an exception: {exc}") + results.append({ + 'parameters': params, + 'score': -np.inf if self.config.maximize else np.inf, + 'iteration': i + }) + + if len(results) % 10 == 0: + print(f"Completed {len(results)}/{len(grid)} evaluations") + + # Sort results + results.sort(key=lambda x: x['score'], reverse=self.config.maximize) + + return { + 'best_parameters': results[0]['parameters'], + 'best_score': results[0]['score'], + 'all_results': results, + 'method': 'grid_search' + } + + +class BayesianOptimizer: + """Bayesian optimization using Gaussian Process""" + + def __init__(self, objective_func: ObjectiveFunction, config: OptimizationConfig): + self.objective_func = objective_func + self.config = config + + def optimize(self, parameter_space: ParameterSpace) -> Dict: + """Run Bayesian optimization""" + # Convert parameter space to bounds + bounds = [] + param_names = [] + + for name, param_info in parameter_space.parameters.items(): + if param_info['type'] in ['continuous', 'integer']: + bounds.append((param_info['min'], param_info['max'])) + param_names.append(name) + + if not bounds: + raise ValueError("Bayesian optimization requires continuous or integer parameters") + + # Initialize with random samples + X_init = [] + y_init = [] + + for _ in range(self.config.n_random_starts): + params = {} + for i, name in enumerate(param_names): + param_info = parameter_space.parameters[name] + if param_info['type'] == 'continuous': + params[name] = np.random.uniform(bounds[i][0], bounds[i][1]) + else: # integer + params[name] = np.random.randint(bounds[i][0], bounds[i][1] + 1) + + # Add discrete parameters if any + for name, param_info in parameter_space.parameters.items(): + if param_info['type'] == 'discrete': + params[name] = np.random.choice(param_info['values']) + + score = self.objective_func.evaluate(params) + X_init.append([params[name] for name in param_names]) + y_init.append(score) + + X_init = np.array(X_init) + y_init = np.array(y_init) + + # Gaussian Process + kernel = C(1.0, (1e-3, 1e3)) * RBF(1.0, (1e-2, 1e2)) + gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) + + best_params = None + best_score = -np.inf if self.config.maximize else np.inf + all_results = [] + + for iteration in range(self.config.max_iterations - self.config.n_random_starts): + # Fit GP + gp.fit(X_init, y_init) + + # Acquisition function (Expected Improvement) + def acquisition(x): + x = x.reshape(1, -1) + mu, sigma = gp.predict(x, return_std=True) + + if self.config.maximize: + improvement = mu - np.max(y_init) + Z = improvement / sigma + ei = improvement * stats.norm.cdf(Z) + sigma * stats.norm.pdf(Z) + return -ei # Minimize negative EI + else: + improvement = np.min(y_init) - mu + Z = improvement / sigma + ei = improvement * stats.norm.cdf(Z) + sigma * stats.norm.pdf(Z) + return -ei + + # Optimize acquisition function + best_x = None + best_acq = np.inf + + for _ in range(100): # Multiple random starts + x0 = np.array([np.random.uniform(b[0], b[1]) for b in bounds]) + res = minimize(acquisition, x0, bounds=bounds, method='L-BFGS-B') + + if res.fun < best_acq: + best_acq = res.fun + best_x = res.x + + # Convert back to parameter dict + next_params = {} + for i, name in enumerate(param_names): + param_info = parameter_space.parameters[name] + if param_info['type'] == 'integer': + next_params[name] = int(round(best_x[i])) + else: + next_params[name] = best_x[i] + + # Add discrete parameters + for name, param_info in parameter_space.parameters.items(): + if param_info['type'] == 'discrete': + next_params[name] = np.random.choice(param_info['values']) + + # Evaluate + next_score = self.objective_func.evaluate(next_params) + + # Update data + X_init = np.vstack([X_init, [next_params[name] for name in param_names]]) + y_init = np.append(y_init, next_score) + + # Update best + if (self.config.maximize and next_score > best_score) or \ + (not self.config.maximize and next_score < best_score): + best_score = next_score + best_params = next_params.copy() + + all_results.append({ + 'parameters': next_params, + 'score': next_score, + 'iteration': iteration + self.config.n_random_starts + }) + + if (iteration + 1) % 10 == 0: + print(f"Bayesian optimization: {iteration + 1}/{self.config.max_iterations - self.config.n_random_starts} iterations") + + return { + 'best_parameters': best_params, + 'best_score': best_score, + 'all_results': all_results, + 'method': 'bayesian' + } + + +class OptunaOptimizer: + """Optuna-based optimization""" + + def __init__(self, objective_func: ObjectiveFunction, config: OptimizationConfig): + self.objective_func = objective_func + self.config = config + + def optimize(self, parameter_space: ParameterSpace) -> Dict: + """Run Optuna optimization""" + def objective(trial): + params = {} + + for name, param_info in parameter_space.parameters.items(): + if param_info['type'] == 'continuous': + params[name] = trial.suggest_float(name, param_info['min'], param_info['max']) + elif param_info['type'] == 'integer': + params[name] = trial.suggest_int(name, param_info['min'], param_info['max']) + elif param_info['type'] == 'discrete': + params[name] = trial.suggest_categorical(name, param_info['values']) + + score = self.objective_func.evaluate(params) + + # Optuna maximizes by default, so negate if we want to minimize + return score if self.config.maximize else -score + + # Create study + direction = 'maximize' if self.config.maximize else 'minimize' + study = optuna.create_study(direction=direction) + + # Optimize + study.optimize(objective, n_trials=self.config.max_iterations) + + # Extract results + all_results = [] + for trial in study.trials: + score = trial.value + if not self.config.maximize: + score = -score # Convert back to original scale + + all_results.append({ + 'parameters': trial.params, + 'score': score, + 'iteration': trial.number + }) + + best_score = study.best_value + if not self.config.maximize: + best_score = -best_score + + return { + 'best_parameters': study.best_params, + 'best_score': best_score, + 'all_results': all_results, + 'method': 'optuna' + } + + +class WalkForwardAnalyzer: + """Walk-forward analysis for strategy validation""" + + def __init__(self, strategy_func: Callable, backtest_func: Callable, + config: OptimizationConfig): + self.strategy_func = strategy_func + self.backtest_func = backtest_func + self.config = config + + def run_analysis(self, data: pd.DataFrame, parameter_space: ParameterSpace) -> Dict: + """Run walk-forward analysis""" + results = [] + optimization_results = [] + + # Create time windows + data_length = len(data) + start_idx = self.config.training_window + + while start_idx + self.config.testing_window < data_length: + # Define training and testing periods + train_start = start_idx - self.config.training_window + train_end = start_idx + test_start = start_idx + test_end = min(start_idx + self.config.testing_window, data_length) + + train_data = data.iloc[train_start:train_end] + test_data = data.iloc[test_start:test_end] + + print(f"Walk-forward period: {train_data.index[0]} to {test_data.index[-1]}") + + # Optimize on training data + objective_func = ObjectiveFunction( + self.strategy_func, self.backtest_func, train_data, self.config + ) + + # Use grid search for walk-forward (faster) + optimizer = GridSearchOptimizer(objective_func, self.config) + opt_result = optimizer.optimize(parameter_space) + + optimization_results.append({ + 'period': (train_data.index[0], train_data.index[-1]), + 'best_parameters': opt_result['best_parameters'], + 'best_score': opt_result['best_score'] + }) + + # Test on out-of-sample data + best_params = opt_result['best_parameters'] + strategy = self.strategy_func(best_params) + test_signals = strategy.generate_detailed_signals(test_data) + + if test_signals is not None and 'signal' in test_signals.columns: + test_results = self.backtest_func(test_data, test_signals['signal']) + + if test_results and 'performance_metrics' in test_results: + metrics = test_results['performance_metrics'] + + results.append({ + 'period': (test_data.index[0], test_data.index[-1]), + 'parameters': best_params, + 'metrics': metrics, + 'in_sample_score': opt_result['best_score'], + 'out_of_sample_score': metrics.get(self.config.objective_metric, 0) + }) + + # Move to next period + start_idx += self.config.step_size + + return self._analyze_walk_forward_results(results, optimization_results) + + def _analyze_walk_forward_results(self, results: List[Dict], + optimization_results: List[Dict]) -> Dict: + """Analyze walk-forward results""" + if not results: + return {'error': 'No valid walk-forward results'} + + # Extract metrics + in_sample_scores = [r['in_sample_score'] for r in results] + out_of_sample_scores = [r['out_of_sample_score'] for r in results] + + # Calculate statistics + is_mean = np.mean(in_sample_scores) + oos_mean = np.mean(out_of_sample_scores) + is_std = np.std(in_sample_scores) + oos_std = np.std(out_of_sample_scores) + + # Overfitting metrics + degradation = (is_mean - oos_mean) / abs(is_mean) if is_mean != 0 else 0 + consistency = np.corrcoef(in_sample_scores, out_of_sample_scores)[0, 1] + + # Stability metrics + parameter_stability = self._calculate_parameter_stability(optimization_results) + + # Overall assessment + is_robust = ( + degradation < 0.3 and # Less than 30% degradation + consistency > self.config.consistency_threshold and # Good consistency + oos_std / abs(oos_mean) < 2.0 if oos_mean != 0 else False # Reasonable stability + ) + + return { + 'periods': len(results), + 'in_sample_mean': is_mean, + 'out_of_sample_mean': oos_mean, + 'in_sample_std': is_std, + 'out_of_sample_std': oos_std, + 'degradation': degradation, + 'consistency': consistency, + 'parameter_stability': parameter_stability, + 'is_robust': is_robust, + 'detailed_results': results, + 'optimization_history': optimization_results + } + + def _calculate_parameter_stability(self, optimization_results: List[Dict]) -> float: + """Calculate parameter stability across periods""" + if len(optimization_results) < 2: + return 1.0 + + # Get all parameter names + all_params = set() + for result in optimization_results: + all_params.update(result['best_parameters'].keys()) + + # Calculate coefficient of variation for each parameter + param_stability = {} + + for param in all_params: + values = [] + for result in optimization_results: + if param in result['best_parameters']: + values.append(result['best_parameters'][param]) + + if len(values) > 1 and np.std(values) > 0: + cv = np.std(values) / abs(np.mean(values)) if np.mean(values) != 0 else np.inf + param_stability[param] = 1 / (1 + cv) # Higher is more stable + else: + param_stability[param] = 1.0 + + return np.mean(list(param_stability.values())) + + +class StrategyOptimizer: + """Main strategy optimization class""" + + def __init__(self, config: OptimizationConfig = None): + self.config = config or OptimizationConfig() + + def optimize_strategy(self, strategy_func: Callable, backtest_func: Callable, + data: pd.DataFrame, parameter_space: ParameterSpace) -> Dict: + """Optimize strategy parameters""" + objective_func = ObjectiveFunction(strategy_func, backtest_func, data, self.config) + + if self.config.method == 'grid_search': + optimizer = GridSearchOptimizer(objective_func, self.config) + elif self.config.method == 'bayesian': + optimizer = BayesianOptimizer(objective_func, self.config) + elif self.config.method == 'optuna': + optimizer = OptunaOptimizer(objective_func, self.config) + else: + raise ValueError(f"Unsupported optimization method: {self.config.method}") + + results = optimizer.optimize(parameter_space) + + # Add walk-forward analysis if requested + if hasattr(self.config, 'run_walk_forward') and self.config.run_walk_forward: + wf_analyzer = WalkForwardAnalyzer(strategy_func, backtest_func, self.config) + wf_results = wf_analyzer.run_analysis(data, parameter_space) + results['walk_forward'] = wf_results + + return results + + def plot_optimization_results(self, results: Dict, figsize: Tuple[int, int] = (15, 10)): + """Plot optimization results""" + if 'all_results' not in results: + print("No detailed results to plot") + return + + all_results = results['all_results'] + scores = [r['score'] for r in all_results] + iterations = [r['iteration'] for r in all_results] + + fig, axes = plt.subplots(2, 2, figsize=figsize) + + # Optimization progress + axes[0, 0].plot(iterations, scores, 'b-', alpha=0.7) + axes[0, 0].axhline(y=results['best_score'], color='r', linestyle='--', + label=f"Best: {results['best_score']:.3f}") + axes[0, 0].set_title('Optimization Progress') + axes[0, 0].set_xlabel('Iteration') + axes[0, 0].set_ylabel(f'{self.config.objective_metric.title()}') + axes[0, 0].legend() + axes[0, 0].grid(True, alpha=0.3) + + # Score distribution + axes[0, 1].hist(scores, bins=30, alpha=0.7, edgecolor='black') + axes[0, 1].axvline(results['best_score'], color='r', linestyle='--', + label=f"Best: {results['best_score']:.3f}") + axes[0, 1].set_title('Score Distribution') + axes[0, 1].set_xlabel(f'{self.config.objective_metric.title()}') + axes[0, 1].set_ylabel('Frequency') + axes[0, 1].legend() + axes[0, 1].grid(True, alpha=0.3) + + # Parameter correlation (if applicable) + if len(all_results) > 10: + # Get parameter names + param_names = list(results['best_parameters'].keys()) + if len(param_names) >= 2: + param1, param2 = param_names[0], param_names[1] + + param1_values = [r['parameters'][param1] for r in all_results] + param2_values = [r['parameters'][param2] for r in all_results] + + scatter = axes[1, 0].scatter(param1_values, param2_values, + c=scores, cmap='viridis', alpha=0.7) + axes[1, 0].set_xlabel(param1) + axes[1, 0].set_ylabel(param2) + axes[1, 0].set_title(f'Parameter Space ({param1} vs {param2})') + plt.colorbar(scatter, ax=axes[1, 0], label=self.config.objective_metric.title()) + + # Walk-forward results (if available) + if 'walk_forward' in results and 'detailed_results' in results['walk_forward']: + wf_results = results['walk_forward']['detailed_results'] + is_scores = [r['in_sample_score'] for r in wf_results] + oos_scores = [r['out_of_sample_score'] for r in wf_results] + + axes[1, 1].scatter(is_scores, oos_scores, alpha=0.7) + axes[1, 1].plot([min(is_scores), max(is_scores)], + [min(is_scores), max(is_scores)], 'r--', alpha=0.5) + axes[1, 1].set_xlabel('In-Sample Score') + axes[1, 1].set_ylabel('Out-of-Sample Score') + axes[1, 1].set_title('Walk-Forward Analysis') + axes[1, 1].grid(True, alpha=0.3) + + plt.tight_layout() + plt.show() + + +# Example usage +if __name__ == "__main__": + from agents.momentum_agent import MomentumAgent + from research.backtest_engine import EnhancedBacktester, BacktestConfig + + # Generate sample data + np.random.seed(42) + dates = pd.date_range('2020-01-01', '2023-12-31', freq='D') + prices = 100 * np.exp(np.cumsum(np.random.randn(len(dates)) * 0.01)) + + sample_data = pd.DataFrame({ + 'close': prices, + 'volume': np.random.randint(1000, 10000, len(dates)) + }, index=dates) + + # Define strategy function + def create_momentum_strategy(params): + return MomentumAgent(params) + + # Define backtest function + def run_backtest(data, signals): + config = BacktestConfig(initial_capital=100000) + backtester = EnhancedBacktester(data, config) + return backtester.backtest_strategy(signals) + + # Define parameter space + param_space = ParameterSpace() + param_space.add_parameter('fast_period', 'integer', min=5, max=20) + param_space.add_parameter('slow_period', 'integer', min=20, max=50) + param_space.add_parameter('momentum_threshold', 'continuous', min=0.01, max=0.05) + + # Add constraint: fast_period < slow_period + param_space.add_constraint(lambda p: p['fast_period'] < p['slow_period']) + + # Create optimizer + config = OptimizationConfig( + method='grid_search', + objective_metric='sharpe_ratio', + max_iterations=50 + ) + + optimizer = StrategyOptimizer(config) + + # Run optimization + print("Running strategy optimization...") + results = optimizer.optimize_strategy( + create_momentum_strategy, run_backtest, sample_data, param_space + ) + + print(f"Best parameters: {results['best_parameters']}") + print(f"Best score: {results['best_score']:.3f}") + + # Plot results + optimizer.plot_optimization_results(results) \ No newline at end of file diff --git a/utils/data-loader.py b/utils/data-loader.py new file mode 100644 index 0000000..1753a4b --- /dev/null +++ b/utils/data-loader.py @@ -0,0 +1,694 @@ +""" +Data Loading and Preprocessing System + +Comprehensive data management for trading algorithms: +- Multiple data source integration +- Data cleaning and validation +- Feature engineering +- Market data normalization +- Real-time and historical data handling +""" + +import pandas as pd +import numpy as np +import yfinance as yf +import requests +import sqlite3 +import json +from typing import Dict, List, Optional, Tuple, Union, Any +from dataclasses import dataclass, field +from datetime import datetime, timedelta +import warnings +import logging +from pathlib import Path +import asyncio +import aiohttp +from concurrent.futures import ThreadPoolExecutor +import pickle + +warnings.filterwarnings('ignore') + +# Setup logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +@dataclass +class DataConfig: + """Configuration for data loading and preprocessing""" + # Data sources + primary_source: str = 'yfinance' # 'yfinance', 'alpha_vantage', 'twelvedata', 'quandl' + backup_sources: List[str] = field(default_factory=lambda: ['yfinance']) + + # Time settings + start_date: str = '2020-01-01' + end_date: str = 'today' + frequency: str = 'daily' # 'minute', 'hourly', 'daily', 'weekly', 'monthly' + + # Data validation + min_data_points: int = 252 # Minimum required data points + max_missing_pct: float = 0.05 # Maximum allowed missing data percentage + outlier_detection: bool = True + outlier_method: str = 'iqr' # 'iqr', 'zscore', 'isolation_forest' + + # Feature engineering + add_technical_indicators: bool = True + add_market_features: bool = True + add_calendar_features: bool = True + + # Storage + cache_data: bool = True + cache_directory: str = './data_cache' + database_path: str = './market_data.db' + + # API keys (set these as environment variables) + alpha_vantage_key: Optional[str] = None + twelvedata_key: Optional[str] = None + quandl_key: Optional[str] = None + + +class DataValidator: + """Data validation and cleaning utilities""" + + @staticmethod + def validate_ohlcv_data(df: pd.DataFrame) -> Tuple[bool, List[str]]: + """Validate OHLCV data integrity""" + issues = [] + + required_columns = ['open', 'high', 'low', 'close'] + missing_columns = [col for col in required_columns if col not in df.columns] + if missing_columns: + issues.append(f"Missing columns: {missing_columns}") + return False, issues + + # Check for negative prices + price_columns = ['open', 'high', 'low', 'close'] + for col in price_columns: + if (df[col] <= 0).any(): + issues.append(f"Non-positive values found in {col}") + + # Check OHLC relationships + if (df['high'] < df['low']).any(): + issues.append("High prices lower than low prices") + + if (df['high'] < df['open']).any() or (df['high'] < df['close']).any(): + issues.append("High prices lower than open/close prices") + + if (df['low'] > df['open']).any() or (df['low'] > df['close']).any(): + issues.append("Low prices higher than open/close prices") + + # Check for excessive missing data + missing_pct = df.isnull().sum() / len(df) + excessive_missing = missing_pct[missing_pct > 0.1] + if not excessive_missing.empty: + issues.append(f"Excessive missing data: {excessive_missing.to_dict()}") + + return len(issues) == 0, issues + + @staticmethod + def detect_outliers(df: pd.DataFrame, method: str = 'iqr', + columns: List[str] = None) -> pd.DataFrame: + """Detect outliers in data""" + if columns is None: + columns = ['open', 'high', 'low', 'close'] + + outliers = pd.DataFrame(False, index=df.index, columns=columns) + + for col in columns: + if col not in df.columns: + continue + + if method == 'iqr': + Q1 = df[col].quantile(0.25) + Q3 = df[col].quantile(0.75) + IQR = Q3 - Q1 + lower_bound = Q1 - 1.5 * IQR + upper_bound = Q3 + 1.5 * IQR + outliers[col] = (df[col] < lower_bound) | (df[col] > upper_bound) + + elif method == 'zscore': + z_scores = np.abs((df[col] - df[col].mean()) / df[col].std()) + outliers[col] = z_scores > 3 + + elif method == 'isolation_forest': + try: + from sklearn.ensemble import IsolationForest + iso_forest = IsolationForest(contamination=0.1, random_state=42) + outliers[col] = iso_forest.fit_predict(df[[col]].fillna(df[col].mean())) == -1 + except ImportError: + logger.warning("scikit-learn not available, falling back to IQR method") + Q1 = df[col].quantile(0.25) + Q3 = df[col].quantile(0.75) + IQR = Q3 - Q1 + lower_bound = Q1 - 1.5 * IQR + upper_bound = Q3 + 1.5 * IQR + outliers[col] = (df[col] < lower_bound) | (df[col] > upper_bound) + + return outliers + + @staticmethod + def clean_data(df: pd.DataFrame, config: DataConfig) -> pd.DataFrame: + """Clean and preprocess data""" + df_clean = df.copy() + + # Handle missing values + # Forward fill first, then backward fill + df_clean = df_clean.fillna(method='ffill').fillna(method='bfill') + + # Detect and handle outliers + if config.outlier_detection: + outliers = DataValidator.detect_outliers(df_clean, config.outlier_method) + + # Replace outliers with interpolated values + for col in outliers.columns: + if col in df_clean.columns: + outlier_mask = outliers[col] + if outlier_mask.any(): + df_clean.loc[outlier_mask, col] = np.nan + df_clean[col] = df_clean[col].interpolate(method='linear') + + # Remove rows with excessive missing data + missing_pct = df_clean.isnull().sum(axis=1) / len(df_clean.columns) + df_clean = df_clean[missing_pct <= config.max_missing_pct] + + return df_clean + + +class FeatureEngineer: + """Feature engineering for trading data""" + + @staticmethod + def add_technical_indicators(df: pd.DataFrame) -> pd.DataFrame: + """Add common technical indicators""" + df_features = df.copy() + + # Price-based features + df_features['returns'] = df_features['close'].pct_change() + df_features['log_returns'] = np.log(df_features['close'] / df_features['close'].shift(1)) + + # Moving averages + for window in [5, 10, 20, 50, 200]: + df_features[f'sma_{window}'] = df_features['close'].rolling(window).mean() + df_features[f'ema_{window}'] = df_features['close'].ewm(span=window).mean() + + # Volatility measures + df_features['volatility_20'] = df_features['returns'].rolling(20).std() * np.sqrt(252) + df_features['atr_14'] = FeatureEngineer._calculate_atr(df_features, 14) + + # Momentum indicators + df_features['rsi_14'] = FeatureEngineer._calculate_rsi(df_features['close'], 14) + df_features['momentum_10'] = df_features['close'] / df_features['close'].shift(10) - 1 + + # MACD + macd_line, macd_signal, macd_histogram = FeatureEngineer._calculate_macd(df_features['close']) + df_features['macd'] = macd_line + df_features['macd_signal'] = macd_signal + df_features['macd_histogram'] = macd_histogram + + # Bollinger Bands + bb_upper, bb_middle, bb_lower = FeatureEngineer._calculate_bollinger_bands(df_features['close']) + df_features['bb_upper'] = bb_upper + df_features['bb_middle'] = bb_middle + df_features['bb_lower'] = bb_lower + df_features['bb_width'] = (bb_upper - bb_lower) / bb_middle + df_features['bb_position'] = (df_features['close'] - bb_lower) / (bb_upper - bb_lower) + + return df_features + + @staticmethod + def add_market_features(df: pd.DataFrame) -> pd.DataFrame: + """Add market structure features""" + df_features = df.copy() + + # Price action features + df_features['body_size'] = abs(df_features['close'] - df_features['open']) + df_features['upper_shadow'] = df_features['high'] - np.maximum(df_features['open'], df_features['close']) + df_features['lower_shadow'] = np.minimum(df_features['open'], df_features['close']) - df_features['low'] + df_features['total_range'] = df_features['high'] - df_features['low'] + + # Volume features (if available) + if 'volume' in df_features.columns: + df_features['volume_sma_20'] = df_features['volume'].rolling(20).mean() + df_features['volume_ratio'] = df_features['volume'] / df_features['volume_sma_20'] + df_features['price_volume'] = df_features['close'] * df_features['volume'] + df_features['vwap'] = df_features['price_volume'].rolling(20).sum() / df_features['volume'].rolling(20).sum() + + # Gap analysis + df_features['gap'] = (df_features['open'] - df_features['close'].shift(1)) / df_features['close'].shift(1) + df_features['gap_filled'] = np.where( + df_features['gap'] > 0, + df_features['low'] <= df_features['close'].shift(1), + df_features['high'] >= df_features['close'].shift(1) + ) + + return df_features + + @staticmethod + def add_calendar_features(df: pd.DataFrame) -> pd.DataFrame: + """Add calendar-based features""" + df_features = df.copy() + + # Time-based features + df_features['year'] = df_features.index.year + df_features['month'] = df_features.index.month + df_features['day'] = df_features.index.day + df_features['day_of_week'] = df_features.index.dayofweek + df_features['day_of_year'] = df_features.index.dayofyear + df_features['week_of_year'] = df_features.index.isocalendar().week + + # Market session features + df_features['is_monday'] = (df_features['day_of_week'] == 0).astype(int) + df_features['is_friday'] = (df_features['day_of_week'] == 4).astype(int) + df_features['is_month_end'] = df_features.index.is_month_end.astype(int) + df_features['is_month_start'] = df_features.index.is_month_start.astype(int) + df_features['is_quarter_end'] = df_features.index.is_quarter_end.astype(int) + + # Seasonal patterns + df_features['month_sin'] = np.sin(2 * np.pi * df_features['month'] / 12) + df_features['month_cos'] = np.cos(2 * np.pi * df_features['month'] / 12) + df_features['day_sin'] = np.sin(2 * np.pi * df_features['day_of_week'] / 7) + df_features['day_cos'] = np.cos(2 * np.pi * df_features['day_of_week'] / 7) + + return df_features + + @staticmethod + def _calculate_rsi(prices: pd.Series, window: int = 14) -> pd.Series: + """Calculate RSI""" + delta = prices.diff() + gain = (delta.where(delta > 0, 0)).rolling(window=window).mean() + loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean() + rs = gain / loss + rsi = 100 - (100 / (1 + rs)) + return rsi + + @staticmethod + def _calculate_atr(df: pd.DataFrame, window: int = 14) -> pd.Series: + """Calculate Average True Range""" + high_low = df['high'] - df['low'] + high_close_prev = np.abs(df['high'] - df['close'].shift()) + low_close_prev = np.abs(df['low'] - df['close'].shift()) + + true_range = pd.concat([high_low, high_close_prev, low_close_prev], axis=1).max(axis=1) + atr = true_range.rolling(window=window).mean() + return atr + + @staticmethod + def _calculate_macd(prices: pd.Series, fast: int = 12, slow: int = 26, signal: int = 9) -> Tuple[pd.Series, pd.Series, pd.Series]: + """Calculate MACD""" + ema_fast = prices.ewm(span=fast).mean() + ema_slow = prices.ewm(span=slow).mean() + macd_line = ema_fast - ema_slow + macd_signal = macd_line.ewm(span=signal).mean() + macd_histogram = macd_line - macd_signal + return macd_line, macd_signal, macd_histogram + + @staticmethod + def _calculate_bollinger_bands(prices: pd.Series, window: int = 20, std_dev: float = 2) -> Tuple[pd.Series, pd.Series, pd.Series]: + """Calculate Bollinger Bands""" + middle = prices.rolling(window=window).mean() + std = prices.rolling(window=window).std() + upper = middle + (std * std_dev) + lower = middle - (std * std_dev) + return upper, middle, lower + + +class DataLoader: + """Main data loading and management class""" + + def __init__(self, config: DataConfig = None): + self.config = config or DataConfig() + self.cache_dir = Path(self.config.cache_directory) + self.cache_dir.mkdir(exist_ok=True) + self.validator = DataValidator() + self.feature_engineer = FeatureEngineer() + + # Initialize database + self._init_database() + + def _init_database(self): + """Initialize SQLite database for data storage""" + with sqlite3.connect(self.config.database_path) as conn: + conn.execute(''' + CREATE TABLE IF NOT EXISTS market_data ( + symbol TEXT, + date TEXT, + open REAL, + high REAL, + low REAL, + close REAL, + volume INTEGER, + source TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (symbol, date) + ) + ''') + + conn.execute(''' + CREATE TABLE IF NOT EXISTS data_metadata ( + symbol TEXT PRIMARY KEY, + last_updated TIMESTAMP, + source TEXT, + data_points INTEGER, + start_date TEXT, + end_date TEXT + ) + ''') + + def get_data(self, symbols: Union[str, List[str]], + start_date: str = None, end_date: str = None, + source: str = None) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]: + """ + Get market data for one or multiple symbols + + Args: + symbols: Single symbol or list of symbols + start_date: Start date (YYYY-MM-DD format) + end_date: End date (YYYY-MM-DD format) + source: Data source to use + + Returns: + DataFrame for single symbol, dict of DataFrames for multiple symbols + """ + if isinstance(symbols, str): + return self._get_single_symbol_data(symbols, start_date, end_date, source) + else: + return self._get_multiple_symbols_data(symbols, start_date, end_date, source) + + def _get_single_symbol_data(self, symbol: str, start_date: str = None, + end_date: str = None, source: str = None) -> pd.DataFrame: + """Get data for a single symbol""" + start_date = start_date or self.config.start_date + end_date = end_date or self.config.end_date + source = source or self.config.primary_source + + if end_date == 'today': + end_date = datetime.now().strftime('%Y-%m-%d') + + # Check cache first + if self.config.cache_data: + cached_data = self._get_cached_data(symbol, start_date, end_date) + if cached_data is not None and len(cached_data) >= self.config.min_data_points: + logger.info(f"Using cached data for {symbol}") + return self._process_data(cached_data, symbol) + + # Fetch new data + logger.info(f"Fetching data for {symbol} from {source}") + raw_data = self._fetch_data(symbol, start_date, end_date, source) + + if raw_data is None or raw_data.empty: + logger.error(f"Failed to fetch data for {symbol}") + return pd.DataFrame() + + # Validate and clean data + is_valid, issues = self.validator.validate_ohlcv_data(raw_data) + if not is_valid: + logger.warning(f"Data validation issues for {symbol}: {issues}") + + cleaned_data = self.validator.clean_data(raw_data, self.config) + + # Cache the data + if self.config.cache_data: + self._cache_data(symbol, cleaned_data, source) + + # Process and return + return self._process_data(cleaned_data, symbol) + + def _get_multiple_symbols_data(self, symbols: List[str], start_date: str = None, + end_date: str = None, source: str = None) -> Dict[str, pd.DataFrame]: + """Get data for multiple symbols""" + results = {} + + # Use ThreadPoolExecutor for parallel data fetching + with ThreadPoolExecutor(max_workers=5) as executor: + future_to_symbol = { + executor.submit(self._get_single_symbol_data, symbol, start_date, end_date, source): symbol + for symbol in symbols + } + + for future in future_to_symbol: + symbol = future_to_symbol[future] + try: + data = future.result() + if not data.empty: + results[symbol] = data + else: + logger.warning(f"No data retrieved for {symbol}") + except Exception as exc: + logger.error(f"Error fetching data for {symbol}: {exc}") + + return results + + def _fetch_data(self, symbol: str, start_date: str, end_date: str, source: str) -> pd.DataFrame: + """Fetch data from specified source""" + try: + if source == 'yfinance': + return self._fetch_yfinance_data(symbol, start_date, end_date) + elif source == 'alpha_vantage': + return self._fetch_alpha_vantage_data(symbol, start_date, end_date) + elif source == 'twelvedata': + return self._fetch_twelvedata_data(symbol, start_date, end_date) + else: + logger.error(f"Unsupported data source: {source}") + return pd.DataFrame() + except Exception as e: + logger.error(f"Error fetching data from {source}: {e}") + + # Try backup sources + for backup_source in self.config.backup_sources: + if backup_source != source: + logger.info(f"Trying backup source: {backup_source}") + try: + return self._fetch_data(symbol, start_date, end_date, backup_source) + except Exception as backup_e: + logger.error(f"Backup source {backup_source} also failed: {backup_e}") + + return pd.DataFrame() + + def _fetch_yfinance_data(self, symbol: str, start_date: str, end_date: str) -> pd.DataFrame: + """Fetch data from Yahoo Finance""" + try: + ticker = yf.Ticker(symbol) + data = ticker.history(start=start_date, end=end_date) + + # Standardize column names + data.columns = [col.lower() for col in data.columns] + data.index.name = 'date' + + return data + except Exception as e: + logger.error(f"Error fetching from yfinance: {e}") + return pd.DataFrame() + + def _fetch_alpha_vantage_data(self, symbol: str, start_date: str, end_date: str) -> pd.DataFrame: + """Fetch data from Alpha Vantage""" + if not self.config.alpha_vantage_key: + logger.error("Alpha Vantage API key not provided") + return pd.DataFrame() + + try: + url = f"https://www.alphavantage.co/query" + params = { + 'function': 'TIME_SERIES_DAILY', + 'symbol': symbol, + 'apikey': self.config.alpha_vantage_key, + 'outputsize': 'full' + } + + response = requests.get(url, params=params) + data = response.json() + + if 'Time Series (Daily)' not in data: + logger.error(f"No data returned from Alpha Vantage for {symbol}") + return pd.DataFrame() + + # Convert to DataFrame + time_series = data['Time Series (Daily)'] + df = pd.DataFrame.from_dict(time_series, orient='index') + df.index = pd.to_datetime(df.index) + df.columns = ['open', 'high', 'low', 'close', 'volume'] + df = df.astype(float) + df.index.name = 'date' + + # Filter by date range + df = df[(df.index >= start_date) & (df.index <= end_date)] + + return df + except Exception as e: + logger.error(f"Error fetching from Alpha Vantage: {e}") + return pd.DataFrame() + + def _fetch_twelvedata_data(self, symbol: str, start_date: str, end_date: str) -> pd.DataFrame: + """Fetch data from Twelve Data""" + if not self.config.twelvedata_key: + logger.error("Twelve Data API key not provided") + return pd.DataFrame() + + try: + url = "https://api.twelvedata.com/time_series" + params = { + 'symbol': symbol, + 'interval': '1day', + 'start_date': start_date, + 'end_date': end_date, + 'apikey': self.config.twelvedata_key + } + + response = requests.get(url, params=params) + data = response.json() + + if 'values' not in data: + logger.error(f"No data returned from Twelve Data for {symbol}") + return pd.DataFrame() + + # Convert to DataFrame + df = pd.DataFrame(data['values']) + df['datetime'] = pd.to_datetime(df['datetime']) + df.set_index('datetime', inplace=True) + df.columns = ['open', 'high', 'low', 'close', 'volume'] + df = df.astype(float) + df.index.name = 'date' + df.sort_index(inplace=True) + + return df + except Exception as e: + logger.error(f"Error fetching from Twelve Data: {e}") + return pd.DataFrame() + + def _get_cached_data(self, symbol: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]: + """Get cached data from database""" + try: + with sqlite3.connect(self.config.database_path) as conn: + query = ''' + SELECT date, open, high, low, close, volume + FROM market_data + WHERE symbol = ? AND date BETWEEN ? AND ? + ORDER BY date + ''' + df = pd.read_sql_query(query, conn, params=(symbol, start_date, end_date)) + + if df.empty: + return None + + df['date'] = pd.to_datetime(df['date']) + df.set_index('date', inplace=True) + return df + except Exception as e: + logger.error(f"Error reading cached data: {e}") + return None + + def _cache_data(self, symbol: str, data: pd.DataFrame, source: str): + """Cache data to database""" + try: + with sqlite3.connect(self.config.database_path) as conn: + # Prepare data for insertion + data_to_insert = data.copy() + data_to_insert['symbol'] = symbol + data_to_insert['source'] = source + data_to_insert.reset_index(inplace=True) + data_to_insert['date'] = data_to_insert['date'].dt.strftime('%Y-%m-%d') + + # Insert data + data_to_insert.to_sql('market_data', conn, if_exists='replace', index=False) + + # Update metadata + metadata = { + 'symbol': symbol, + 'last_updated': datetime.now().isoformat(), + 'source': source, + 'data_points': len(data), + 'start_date': data.index.min().strftime('%Y-%m-%d'), + 'end_date': data.index.max().strftime('%Y-%m-%d') + } + + conn.execute(''' + INSERT OR REPLACE INTO data_metadata + (symbol, last_updated, source, data_points, start_date, end_date) + VALUES (?, ?, ?, ?, ?, ?) + ''', tuple(metadata.values())) + + except Exception as e: + logger.error(f"Error caching data: {e}") + + def _process_data(self, data: pd.DataFrame, symbol: str) -> pd.DataFrame: + """Process raw data with feature engineering""" + processed_data = data.copy() + + # Add technical indicators + if self.config.add_technical_indicators: + processed_data = self.feature_engineer.add_technical_indicators(processed_data) + + # Add market features + if self.config.add_market_features: + processed_data = self.feature_engineer.add_market_features(processed_data) + + # Add calendar features + if self.config.add_calendar_features: + processed_data = self.feature_engineer.add_calendar_features(processed_data) + + return processed_data + + def get_data_info(self, symbol: str = None) -> pd.DataFrame: + """Get information about cached data""" + try: + with sqlite3.connect(self.config.database_path) as conn: + if symbol: + query = "SELECT * FROM data_metadata WHERE symbol = ?" + params = (symbol,) + else: + query = "SELECT * FROM data_metadata" + params = () + + df = pd.read_sql_query(query, conn, params=params) + return df + except Exception as e: + logger.error(f"Error getting data info: {e}") + return pd.DataFrame() + + def clear_cache(self, symbol: str = None): + """Clear cached data""" + try: + with sqlite3.connect(self.config.database_path) as conn: + if symbol: + conn.execute("DELETE FROM market_data WHERE symbol = ?", (symbol,)) + conn.execute("DELETE FROM data_metadata WHERE symbol = ?", (symbol,)) + else: + conn.execute("DELETE FROM market_data") + conn.execute("DELETE FROM data_metadata") + + logger.info(f"Cache cleared for {'all symbols' if not symbol else symbol}") + except Exception as e: + logger.error(f"Error clearing cache: {e}") + + +# Example usage +if __name__ == "__main__": + # Create data loader with configuration + config = DataConfig( + start_date='2020-01-01', + end_date='2023-12-31', + add_technical_indicators=True, + add_market_features=True, + cache_data=True + ) + + loader = DataLoader(config) + + # Load single symbol + print("Loading data for AAPL...") + aapl_data = loader.get_data('AAPL') + print(f"AAPL data shape: {aapl_data.shape}") + print(f"AAPL columns: {list(aapl_data.columns)}") + print(f"AAPL date range: {aapl_data.index.min()} to {aapl_data.index.max()}") + + # Load multiple symbols + print("\nLoading data for multiple symbols...") + symbols = ['AAPL', 'GOOGL', 'MSFT', 'TSLA'] + multi_data = loader.get_data(symbols) + + for symbol, data in multi_data.items(): + print(f"{symbol}: {data.shape[0]} rows, {data.shape[1]} columns") + + # Show data info + print("\nCached data info:") + info = loader.get_data_info() + print(info) \ No newline at end of file diff --git a/utils/risk-analytics.py b/utils/risk-analytics.py new file mode 100644 index 0000000..1c9b82f --- /dev/null +++ b/utils/risk-analytics.py @@ -0,0 +1,673 @@ +""" +Comprehensive Risk Analytics and Performance Metrics + +Advanced risk measurement and performance attribution: +- Value at Risk (VaR) and Conditional VaR +- Risk-adjusted returns +- Factor analysis and attribution +- Stress testing and scenario analysis +- Risk budgeting and allocation +- Tail risk measures +""" + +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns +from typing import Dict, List, Optional, Tuple, Union, Any +from dataclasses import dataclass +from scipy import stats +from scipy.optimize import minimize +from sklearn.decomposition import PCA +from sklearn.linear_model import LinearRegression +import warnings +from datetime import datetime, timedelta +import yfinance as yf + +warnings.filterwarnings('ignore') + + +@dataclass +class RiskConfig: + """Configuration for risk analytics""" + # VaR parameters + var_confidence_levels: List[float] = None + var_methods: List[str] = None # 'historical', 'parametric', 'monte_carlo' + + # Stress testing + stress_scenarios: Dict[str, float] = None + monte_carlo_simulations: int = 10000 + + # Factor analysis + benchmark_symbols: List[str] = None + factor_lookback: int = 252 + + # Risk budgeting + risk_budget_method: str = 'component_var' # 'component_var', 'marginal_var' + + def __post_init__(self): + if self.var_confidence_levels is None: + self.var_confidence_levels = [0.01, 0.05, 0.10] + + if self.var_methods is None: + self.var_methods = ['historical', 'parametric'] + + if self.stress_scenarios is None: + self.stress_scenarios = { + 'market_crash': -0.20, + 'moderate_decline': -0.10, + 'volatility_spike': 0.50, + 'interest_rate_shock': 0.02 + } + + if self.benchmark_symbols is None: + self.benchmark_symbols = ['^GSPC', '^IXIC', '^RUT'] # S&P 500, NASDAQ, Russell 2000 + + +class VaRCalculator: + """Value at Risk calculations using different methods""" + + @staticmethod + def historical_var(returns: pd.Series, confidence_level: float = 0.05) -> float: + """Calculate historical VaR""" + return returns.quantile(confidence_level) + + @staticmethod + def parametric_var(returns: pd.Series, confidence_level: float = 0.05) -> float: + """Calculate parametric VaR assuming normal distribution""" + mu = returns.mean() + sigma = returns.std() + z_score = stats.norm.ppf(confidence_level) + return mu + z_score * sigma + + @staticmethod + def monte_carlo_var(returns: pd.Series, confidence_level: float = 0.05, + n_simulations: int = 10000) -> float: + """Calculate Monte Carlo VaR""" + mu = returns.mean() + sigma = returns.std() + + # Generate random scenarios + random_returns = np.random.normal(mu, sigma, n_simulations) + + return np.percentile(random_returns, confidence_level * 100) + + @staticmethod + def conditional_var(returns: pd.Series, confidence_level: float = 0.05, + method: str = 'historical') -> float: + """Calculate Conditional VaR (Expected Shortfall)""" + if method == 'historical': + var_threshold = VaRCalculator.historical_var(returns, confidence_level) + elif method == 'parametric': + var_threshold = VaRCalculator.parametric_var(returns, confidence_level) + else: + var_threshold = VaRCalculator.monte_carlo_var(returns, confidence_level) + + # Calculate expected value of returns below VaR threshold + tail_returns = returns[returns <= var_threshold] + + if len(tail_returns) == 0: + return var_threshold + + return tail_returns.mean() + + +class RiskMetrics: + """Comprehensive risk metrics calculation""" + + def __init__(self, config: RiskConfig = None): + self.config = config or RiskConfig() + self.var_calculator = VaRCalculator() + + def calculate_basic_metrics(self, returns: pd.Series, + benchmark_returns: pd.Series = None) -> Dict[str, float]: + """Calculate basic risk and performance metrics""" + metrics = {} + + # Return metrics + metrics['total_return'] = (1 + returns).prod() - 1 + metrics['annualized_return'] = (1 + returns.mean()) ** 252 - 1 + metrics['volatility'] = returns.std() * np.sqrt(252) + + # Risk-adjusted returns + risk_free_rate = 0.03 # 3% annual risk-free rate + excess_returns = returns - risk_free_rate / 252 + metrics['sharpe_ratio'] = excess_returns.mean() / returns.std() * np.sqrt(252) if returns.std() > 0 else 0 + + # Downside risk + negative_returns = returns[returns < 0] + if len(negative_returns) > 0: + downside_deviation = negative_returns.std() * np.sqrt(252) + metrics['sortino_ratio'] = excess_returns.mean() / downside_deviation * np.sqrt(252) + else: + metrics['sortino_ratio'] = np.inf + + # Drawdown metrics + cumulative_returns = (1 + returns).cumprod() + running_max = cumulative_returns.expanding().max() + drawdown = (cumulative_returns - running_max) / running_max + + metrics['max_drawdown'] = drawdown.min() + metrics['current_drawdown'] = drawdown.iloc[-1] + + # Calmar ratio + if metrics['max_drawdown'] != 0: + metrics['calmar_ratio'] = metrics['annualized_return'] / abs(metrics['max_drawdown']) + else: + metrics['calmar_ratio'] = np.inf + + # Skewness and Kurtosis + metrics['skewness'] = returns.skew() + metrics['kurtosis'] = returns.kurtosis() + + # Win rate + winning_periods = (returns > 0).sum() + total_periods = len(returns) + metrics['win_rate'] = winning_periods / total_periods if total_periods > 0 else 0 + + # Average win/loss + winning_returns = returns[returns > 0] + losing_returns = returns[returns < 0] + + if len(winning_returns) > 0: + metrics['avg_win'] = winning_returns.mean() + else: + metrics['avg_win'] = 0 + + if len(losing_returns) > 0: + metrics['avg_loss'] = losing_returns.mean() + metrics['win_loss_ratio'] = abs(metrics['avg_win'] / metrics['avg_loss']) if metrics['avg_loss'] != 0 else np.inf + else: + metrics['avg_loss'] = 0 + metrics['win_loss_ratio'] = np.inf + + # Benchmark comparison (if provided) + if benchmark_returns is not None and len(benchmark_returns) == len(returns): + # Beta + covariance = returns.cov(benchmark_returns) + benchmark_variance = benchmark_returns.var() + metrics['beta'] = covariance / benchmark_variance if benchmark_variance != 0 else 0 + + # Alpha + benchmark_return = benchmark_returns.mean() * 252 + metrics['alpha'] = metrics['annualized_return'] - (risk_free_rate + metrics['beta'] * (benchmark_return - risk_free_rate)) + + # Information ratio + excess_returns_vs_benchmark = returns - benchmark_returns + tracking_error = excess_returns_vs_benchmark.std() * np.sqrt(252) + metrics['information_ratio'] = excess_returns_vs_benchmark.mean() / tracking_error * np.sqrt(252) if tracking_error > 0 else 0 + + # Correlation + metrics['correlation'] = returns.corr(benchmark_returns) + + return metrics + + def calculate_var_metrics(self, returns: pd.Series) -> Dict[str, Dict[str, float]]: + """Calculate VaR and CVaR for different confidence levels and methods""" + var_metrics = {} + + for confidence_level in self.config.var_confidence_levels: + var_metrics[f'{int(confidence_level * 100)}%'] = {} + + for method in self.config.var_methods: + if method == 'historical': + var_value = self.var_calculator.historical_var(returns, confidence_level) + elif method == 'parametric': + var_value = self.var_calculator.parametric_var(returns, confidence_level) + elif method == 'monte_carlo': + var_value = self.var_calculator.monte_carlo_var(returns, confidence_level, + self.config.monte_carlo_simulations) + else: + continue + + cvar_value = self.var_calculator.conditional_var(returns, confidence_level, method) + + var_metrics[f'{int(confidence_level * 100)}%'][f'var_{method}'] = var_value + var_metrics[f'{int(confidence_level * 100)}%'][f'cvar_{method}'] = cvar_value + + return var_metrics + + def calculate_tail_risk_metrics(self, returns: pd.Series) -> Dict[str, float]: + """Calculate tail risk metrics""" + metrics = {} + + # Expected Shortfall at different levels + for confidence_level in [0.01, 0.05, 0.10]: + var_threshold = self.var_calculator.historical_var(returns, confidence_level) + tail_returns = returns[returns <= var_threshold] + + if len(tail_returns) > 0: + metrics[f'expected_shortfall_{int(confidence_level * 100)}%'] = tail_returns.mean() + else: + metrics[f'expected_shortfall_{int(confidence_level * 100)}%'] = var_threshold + + # Tail ratio + right_tail = returns.quantile(0.95) + left_tail = returns.quantile(0.05) + metrics['tail_ratio'] = abs(right_tail / left_tail) if left_tail != 0 else np.inf + + # Maximum consecutive losses + consecutive_losses = 0 + max_consecutive_losses = 0 + + for ret in returns: + if ret < 0: + consecutive_losses += 1 + max_consecutive_losses = max(max_consecutive_losses, consecutive_losses) + else: + consecutive_losses = 0 + + metrics['max_consecutive_losses'] = max_consecutive_losses + + return metrics + + def stress_test(self, portfolio_value: float, returns: pd.Series, + positions: Dict[str, float] = None) -> Dict[str, float]: + """Perform stress testing under various scenarios""" + stress_results = {} + + for scenario_name, shock_magnitude in self.config.stress_scenarios.items(): + if scenario_name == 'market_crash' or scenario_name == 'moderate_decline': + # Apply negative shock to returns + stressed_return = shock_magnitude + stressed_portfolio_value = portfolio_value * (1 + stressed_return) + stress_results[scenario_name] = { + 'portfolio_value': stressed_portfolio_value, + 'loss': portfolio_value - stressed_portfolio_value, + 'loss_percentage': stressed_return + } + + elif scenario_name == 'volatility_spike': + # Calculate impact of volatility increase + current_vol = returns.std() * np.sqrt(252) + stressed_vol = current_vol * (1 + shock_magnitude) + + # Estimate VaR under stressed volatility + stressed_var = returns.mean() + stats.norm.ppf(0.05) * (stressed_vol / np.sqrt(252)) + stressed_portfolio_value = portfolio_value * (1 + stressed_var) + + stress_results[scenario_name] = { + 'portfolio_value': stressed_portfolio_value, + 'loss': portfolio_value - stressed_portfolio_value, + 'loss_percentage': stressed_var, + 'stressed_volatility': stressed_vol + } + + return stress_results + + def calculate_risk_contribution(self, returns_matrix: pd.DataFrame, + weights: np.ndarray) -> Dict[str, Any]: + """Calculate risk contribution of each component""" + # Calculate portfolio return + portfolio_returns = (returns_matrix * weights).sum(axis=1) + portfolio_var = portfolio_returns.var() + + # Calculate marginal VaR + marginal_var = {} + component_var = {} + + for i, asset in enumerate(returns_matrix.columns): + # Marginal VaR: derivative of portfolio variance with respect to weight + marginal_var[asset] = 2 * weights[i] * returns_matrix[asset].cov(portfolio_returns) / portfolio_var if portfolio_var > 0 else 0 + + # Component VaR: weight * marginal VaR + component_var[asset] = weights[i] * marginal_var[asset] + + # Risk contribution as percentage + total_component_var = sum(component_var.values()) + risk_contribution_pct = { + asset: component_var[asset] / total_component_var * 100 if total_component_var != 0 else 0 + for asset in component_var + } + + return { + 'marginal_var': marginal_var, + 'component_var': component_var, + 'risk_contribution_pct': risk_contribution_pct, + 'portfolio_var': portfolio_var + } + + +class FactorAnalysis: + """Factor analysis and performance attribution""" + + def __init__(self, config: RiskConfig = None): + self.config = config or RiskConfig() + self.factor_data = None + self.factor_loadings = None + + def load_factor_data(self, start_date: str = None, end_date: str = None): + """Load factor data (market benchmarks)""" + if start_date is None: + start_date = (datetime.now() - timedelta(days=self.config.factor_lookback * 2)).strftime('%Y-%m-%d') + if end_date is None: + end_date = datetime.now().strftime('%Y-%m-%d') + + factor_data = {} + + for symbol in self.config.benchmark_symbols: + try: + ticker = yf.Ticker(symbol) + data = ticker.history(start=start_date, end=end_date) + if not data.empty: + returns = data['Close'].pct_change().dropna() + factor_data[symbol] = returns + except Exception as e: + print(f"Error loading factor data for {symbol}: {e}") + + if factor_data: + self.factor_data = pd.DataFrame(factor_data) + self.factor_data = self.factor_data.dropna() + + def perform_factor_analysis(self, returns: pd.Series) -> Dict[str, Any]: + """Perform factor analysis using regression""" + if self.factor_data is None: + self.load_factor_data() + + if self.factor_data is None or self.factor_data.empty: + return {'error': 'No factor data available'} + + # Align dates + common_dates = returns.index.intersection(self.factor_data.index) + if len(common_dates) < 60: # Minimum data requirement + return {'error': 'Insufficient overlapping data'} + + returns_aligned = returns[common_dates] + factors_aligned = self.factor_data.loc[common_dates] + + # Multiple regression + X = factors_aligned.values + y = returns_aligned.values + + # Add constant for alpha + X_with_const = np.column_stack([np.ones(len(X)), X]) + + # Fit regression + reg = LinearRegression(fit_intercept=False) + reg.fit(X_with_const, y) + + # Extract results + alpha = reg.coef_[0] * 252 # Annualized alpha + factor_loadings = dict(zip(self.factor_data.columns, reg.coef_[1:])) + + # Calculate R-squared + y_pred = reg.predict(X_with_const) + ss_res = np.sum((y - y_pred) ** 2) + ss_tot = np.sum((y - np.mean(y)) ** 2) + r_squared = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0 + + # Factor contribution to return + factor_contributions = {} + for factor, loading in factor_loadings.items(): + factor_return = factors_aligned[factor].mean() * 252 + factor_contributions[factor] = loading * factor_return + + # Residual risk + residuals = y - y_pred + idiosyncratic_risk = np.std(residuals) * np.sqrt(252) + + return { + 'alpha': alpha, + 'factor_loadings': factor_loadings, + 'factor_contributions': factor_contributions, + 'r_squared': r_squared, + 'idiosyncratic_risk': idiosyncratic_risk, + 'total_systematic_risk': np.sqrt(np.var(y_pred)) * np.sqrt(252) + } + + def perform_pca_analysis(self, returns_matrix: pd.DataFrame) -> Dict[str, Any]: + """Perform Principal Component Analysis""" + # Standardize returns + returns_std = (returns_matrix - returns_matrix.mean()) / returns_matrix.std() + returns_std = returns_std.dropna() + + # Perform PCA + pca = PCA() + pca.fit(returns_std) + + # Extract results + explained_variance_ratio = pca.explained_variance_ratio_ + cumulative_variance = np.cumsum(explained_variance_ratio) + + # Principal components + components = pd.DataFrame( + pca.components_[:5], # First 5 components + columns=returns_matrix.columns, + index=[f'PC{i+1}' for i in range(5)] + ) + + # Transform data + transformed_data = pca.transform(returns_std) + + return { + 'explained_variance_ratio': explained_variance_ratio, + 'cumulative_variance': cumulative_variance, + 'components': components, + 'n_components_90_variance': np.argmax(cumulative_variance >= 0.9) + 1, + 'transformed_data': transformed_data + } + + +class RiskAnalyzer: + """Main risk analysis class""" + + def __init__(self, config: RiskConfig = None): + self.config = config or RiskConfig() + self.risk_metrics = RiskMetrics(config) + self.factor_analysis = FactorAnalysis(config) + + def comprehensive_risk_analysis(self, returns: pd.Series, + benchmark_returns: pd.Series = None, + portfolio_value: float = 100000) -> Dict[str, Any]: + """Perform comprehensive risk analysis""" + analysis_results = {} + + # Basic metrics + analysis_results['basic_metrics'] = self.risk_metrics.calculate_basic_metrics( + returns, benchmark_returns + ) + + # VaR metrics + analysis_results['var_metrics'] = self.risk_metrics.calculate_var_metrics(returns) + + # Tail risk metrics + analysis_results['tail_risk'] = self.risk_metrics.calculate_tail_risk_metrics(returns) + + # Stress testing + analysis_results['stress_test'] = self.risk_metrics.stress_test( + portfolio_value, returns + ) + + # Factor analysis + analysis_results['factor_analysis'] = self.factor_analysis.perform_factor_analysis(returns) + + return analysis_results + + def plot_risk_analysis(self, results: Dict[str, Any], returns: pd.Series, + figsize: Tuple[int, int] = (16, 12)): + """Plot comprehensive risk analysis""" + fig, axes = plt.subplots(3, 3, figsize=figsize) + axes = axes.flatten() + + # 1. Return distribution + axes[0].hist(returns * 100, bins=50, alpha=0.7, edgecolor='black') + axes[0].axvline(returns.mean() * 100, color='red', linestyle='--', + label=f'Mean: {returns.mean()*100:.2f}%') + axes[0].set_title('Return Distribution') + axes[0].set_xlabel('Daily Return (%)') + axes[0].set_ylabel('Frequency') + axes[0].legend() + axes[0].grid(True, alpha=0.3) + + # 2. Cumulative returns + cumulative_returns = (1 + returns).cumprod() + axes[1].plot(cumulative_returns.index, cumulative_returns, linewidth=2) + axes[1].set_title('Cumulative Returns') + axes[1].set_ylabel('Cumulative Return') + axes[1].grid(True, alpha=0.3) + + # 3. Drawdown + running_max = cumulative_returns.expanding().max() + drawdown = (cumulative_returns - running_max) / running_max * 100 + axes[2].fill_between(drawdown.index, drawdown, 0, color='red', alpha=0.3) + axes[2].plot(drawdown.index, drawdown, color='red', linewidth=1) + axes[2].set_title(f'Drawdown (Max: {results["basic_metrics"]["max_drawdown"]:.2%})') + axes[2].set_ylabel('Drawdown (%)') + axes[2].grid(True, alpha=0.3) + + # 4. Rolling volatility + rolling_vol = returns.rolling(30).std() * np.sqrt(252) * 100 + axes[3].plot(rolling_vol.index, rolling_vol, linewidth=1) + axes[3].set_title('30-Day Rolling Volatility') + axes[3].set_ylabel('Volatility (%)') + axes[3].grid(True, alpha=0.3) + + # 5. VaR comparison + if 'var_metrics' in results: + var_data = results['var_metrics'] + confidence_levels = list(var_data.keys()) + historical_vars = [var_data[level]['var_historical'] * 100 for level in confidence_levels] + parametric_vars = [var_data[level]['var_parametric'] * 100 for level in confidence_levels] + + x = np.arange(len(confidence_levels)) + width = 0.35 + + axes[4].bar(x - width/2, historical_vars, width, label='Historical VaR', alpha=0.7) + axes[4].bar(x + width/2, parametric_vars, width, label='Parametric VaR', alpha=0.7) + axes[4].set_title('VaR Comparison') + axes[4].set_xlabel('Confidence Level') + axes[4].set_ylabel('VaR (%)') + axes[4].set_xticks(x) + axes[4].set_xticklabels(confidence_levels) + axes[4].legend() + axes[4].grid(True, alpha=0.3) + + # 6. Q-Q plot + from scipy.stats import probplot + probplot(returns, dist="norm", plot=axes[5]) + axes[5].set_title('Q-Q Plot (Normal Distribution)') + axes[5].grid(True, alpha=0.3) + + # 7. Factor loadings (if available) + if 'factor_analysis' in results and 'factor_loadings' in results['factor_analysis']: + factor_loadings = results['factor_analysis']['factor_loadings'] + factors = list(factor_loadings.keys()) + loadings = list(factor_loadings.values()) + + axes[6].bar(factors, loadings, alpha=0.7) + axes[6].set_title('Factor Loadings') + axes[6].set_ylabel('Loading') + axes[6].tick_params(axis='x', rotation=45) + axes[6].grid(True, alpha=0.3) + + # 8. Stress test results + if 'stress_test' in results: + stress_data = results['stress_test'] + scenarios = list(stress_data.keys()) + losses = [stress_data[scenario]['loss_percentage'] * 100 for scenario in scenarios] + + axes[7].bar(scenarios, losses, alpha=0.7, color='red') + axes[7].set_title('Stress Test Results') + axes[7].set_ylabel('Loss (%)') + axes[7].tick_params(axis='x', rotation=45) + axes[7].grid(True, alpha=0.3) + + # 9. Risk metrics summary (text) + basic_metrics = results['basic_metrics'] + summary_text = f""" + Sharpe Ratio: {basic_metrics['sharpe_ratio']:.2f} + Sortino Ratio: {basic_metrics['sortino_ratio']:.2f} + Max Drawdown: {basic_metrics['max_drawdown']:.2%} + Volatility: {basic_metrics['volatility']:.2%} + Skewness: {basic_metrics['skewness']:.2f} + Kurtosis: {basic_metrics['kurtosis']:.2f} + Win Rate: {basic_metrics['win_rate']:.2%} + """ + + axes[8].text(0.1, 0.9, summary_text, transform=axes[8].transAxes, + fontsize=10, verticalalignment='top', + bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5)) + axes[8].set_title('Risk Metrics Summary') + axes[8].axis('off') + + plt.tight_layout() + plt.show() + + # Print detailed results + self._print_risk_summary(results) + + def _print_risk_summary(self, results: Dict[str, Any]): + """Print formatted risk analysis summary""" + print("=" * 80) + print("COMPREHENSIVE RISK ANALYSIS REPORT") + print("=" * 80) + + # Basic metrics + basic = results['basic_metrics'] + print("\nPERFORMANCE METRICS:") + print("-" * 40) + print(f"Total Return: {basic['total_return']:.2%}") + print(f"Annualized Return: {basic['annualized_return']:.2%}") + print(f"Volatility: {basic['volatility']:.2%}") + print(f"Sharpe Ratio: {basic['sharpe_ratio']:.2f}") + print(f"Sortino Ratio: {basic['sortino_ratio']:.2f}") + print(f"Calmar Ratio: {basic['calmar_ratio']:.2f}") + + # Risk metrics + print("\nRISK METRICS:") + print("-" * 40) + print(f"Maximum Drawdown: {basic['max_drawdown']:.2%}") + print(f"Current Drawdown: {basic['current_drawdown']:.2%}") + print(f"Skewness: {basic['skewness']:.2f}") + print(f"Kurtosis: {basic['kurtosis']:.2f}") + print(f"Win Rate: {basic['win_rate']:.2%}") + print(f"Win/Loss Ratio: {basic['win_loss_ratio']:.2f}") + + # VaR metrics + if 'var_metrics' in results: + print("\nVALUE AT RISK:") + print("-" * 40) + for level, metrics in results['var_metrics'].items(): + print(f"{level} VaR (Historical): {metrics['var_historical']:.2%}") + print(f"{level} CVaR (Historical): {metrics['cvar_historical']:.2%}") + + # Factor analysis + if 'factor_analysis' in results and 'alpha' in results['factor_analysis']: + fa = results['factor_analysis'] + print("\nFACTOR ANALYSIS:") + print("-" * 40) + print(f"Alpha (Annualized): {fa['alpha']:.2%}") + print(f"R-squared: {fa['r_squared']:.2%}") + print(f"Idiosyncratic Risk: {fa['idiosyncratic_risk']:.2%}") + + print("\nFactor Loadings:") + for factor, loading in fa['factor_loadings'].items(): + print(f" {factor}: {loading:.3f}") + + print("=" * 80) + + +# Example usage +if __name__ == "__main__": + # Generate sample return data + np.random.seed(42) + dates = pd.date_range('2020-01-01', '2023-12-31', freq='D') + + # Create returns with some realistic characteristics + base_returns = np.random.randn(len(dates)) * 0.01 + volatility_clustering = np.random.randn(len(dates)) * 0.005 + trend = np.linspace(0, 0.0002, len(dates)) # Slight upward trend + + returns = base_returns + volatility_clustering + trend + returns = pd.Series(returns, index=dates) + + # Create risk analyzer + config = RiskConfig() + analyzer = RiskAnalyzer(config) + + # Perform comprehensive analysis + print("Running comprehensive risk analysis...") + results = analyzer.comprehensive_risk_analysis(returns, portfolio_value=1000000) + + # Plot results + analyzer.plot_risk_analysis(results, returns) \ No newline at end of file diff --git a/utils/visualization.py b/utils/visualization.py new file mode 100644 index 0000000..ecb640f --- /dev/null +++ b/utils/visualization.py @@ -0,0 +1,923 @@ +""" +Comprehensive Visualization Tools for Trading Analysis + +Advanced plotting and visualization utilities: +- Interactive charts with Plotly +- Performance dashboards +- Strategy comparison plots +- Risk visualization +- Portfolio analytics +- Technical analysis charts +""" + +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns +import plotly.graph_objects as go +import plotly.express as px +from plotly.subplots import make_subplots +import plotly.figure_factory as ff +from typing import Dict, List, Optional, Tuple, Union, Any +from datetime import datetime, timedelta +import warnings + +warnings.filterwarnings('ignore') + +# Set style +plt.style.use('seaborn-v0_8') +sns.set_palette("husl") + + +class TradingVisualizer: + """Comprehensive trading visualization toolkit""" + + def __init__(self, style: str = 'plotly_white'): + self.style = style + self.colors = { + 'primary': '#1f77b4', + 'secondary': '#ff7f0e', + 'success': '#2ca02c', + 'danger': '#d62728', + 'warning': '#ff7f0e', + 'info': '#17a2b8', + 'light': '#f8f9fa', + 'dark': '#343a40' + } + + def plot_price_and_signals(self, data: pd.DataFrame, signals: pd.Series = None, + title: str = "Price Chart with Trading Signals", + figsize: Tuple[int, int] = (15, 8)) -> go.Figure: + """Plot price chart with trading signals""" + fig = make_subplots( + rows=2, cols=1, + shared_xaxes=True, + vertical_spacing=0.03, + subplot_titles=('Price and Signals', 'Volume'), + row_heights=[0.7, 0.3] + ) + + # Price chart + fig.add_trace( + go.Scatter( + x=data.index, + y=data['close'], + mode='lines', + name='Price', + line=dict(color=self.colors['primary'], width=2) + ), + row=1, col=1 + ) + + # Add moving averages if available + if 'sma_20' in data.columns: + fig.add_trace( + go.Scatter( + x=data.index, + y=data['sma_20'], + mode='lines', + name='SMA 20', + line=dict(color=self.colors['secondary'], width=1) + ), + row=1, col=1 + ) + + if 'sma_50' in data.columns: + fig.add_trace( + go.Scatter( + x=data.index, + y=data['sma_50'], + mode='lines', + name='SMA 50', + line=dict(color=self.colors['warning'], width=1) + ), + row=1, col=1 + ) + + # Add trading signals + if signals is not None: + buy_signals = data[signals == 1] + sell_signals = data[signals == -1] + + if not buy_signals.empty: + fig.add_trace( + go.Scatter( + x=buy_signals.index, + y=buy_signals['close'], + mode='markers', + name='Buy Signal', + marker=dict( + symbol='triangle-up', + size=12, + color=self.colors['success'] + ) + ), + row=1, col=1 + ) + + if not sell_signals.empty: + fig.add_trace( + go.Scatter( + x=sell_signals.index, + y=sell_signals['close'], + mode='markers', + name='Sell Signal', + marker=dict( + symbol='triangle-down', + size=12, + color=self.colors['danger'] + ) + ), + row=1, col=1 + ) + + # Volume chart + if 'volume' in data.columns: + colors = ['red' if close < open else 'green' + for close, open in zip(data['close'], data['open'])] + + fig.add_trace( + go.Bar( + x=data.index, + y=data['volume'], + name='Volume', + marker_color=colors, + opacity=0.7 + ), + row=2, col=1 + ) + + fig.update_layout( + title=title, + template=self.style, + height=600, + showlegend=True, + xaxis_rangeslider_visible=False + ) + + return fig + + def plot_candlestick_chart(self, data: pd.DataFrame, + title: str = "Candlestick Chart", + indicators: List[str] = None) -> go.Figure: + """Plot candlestick chart with technical indicators""" + if not all(col in data.columns for col in ['open', 'high', 'low', 'close']): + raise ValueError("Data must contain OHLC columns") + + fig = make_subplots( + rows=3, cols=1, + shared_xaxes=True, + vertical_spacing=0.02, + subplot_titles=('Price', 'Volume', 'Indicators'), + row_heights=[0.6, 0.2, 0.2] + ) + + # Candlestick chart + fig.add_trace( + go.Candlestick( + x=data.index, + open=data['open'], + high=data['high'], + low=data['low'], + close=data['close'], + name='Price', + increasing_line_color=self.colors['success'], + decreasing_line_color=self.colors['danger'] + ), + row=1, col=1 + ) + + # Bollinger Bands + if all(col in data.columns for col in ['bb_upper', 'bb_lower', 'bb_middle']): + fig.add_trace( + go.Scatter( + x=data.index, + y=data['bb_upper'], + mode='lines', + name='BB Upper', + line=dict(color='rgba(128,128,128,0.5)', width=1), + showlegend=False + ), + row=1, col=1 + ) + + fig.add_trace( + go.Scatter( + x=data.index, + y=data['bb_lower'], + mode='lines', + name='BB Lower', + line=dict(color='rgba(128,128,128,0.5)', width=1), + fill='tonexty', + fillcolor='rgba(128,128,128,0.1)', + showlegend=False + ), + row=1, col=1 + ) + + fig.add_trace( + go.Scatter( + x=data.index, + y=data['bb_middle'], + mode='lines', + name='BB Middle', + line=dict(color='rgba(128,128,128,0.7)', width=1) + ), + row=1, col=1 + ) + + # Volume + if 'volume' in data.columns: + colors = ['red' if close < open else 'green' + for close, open in zip(data['close'], data['open'])] + + fig.add_trace( + go.Bar( + x=data.index, + y=data['volume'], + name='Volume', + marker_color=colors, + opacity=0.7 + ), + row=2, col=1 + ) + + # Technical indicators + if 'rsi_14' in data.columns: + fig.add_trace( + go.Scatter( + x=data.index, + y=data['rsi_14'], + mode='lines', + name='RSI', + line=dict(color=self.colors['info'], width=2) + ), + row=3, col=1 + ) + + # RSI levels + fig.add_hline(y=70, line_dash="dash", line_color="red", row=3, col=1) + fig.add_hline(y=30, line_dash="dash", line_color="green", row=3, col=1) + fig.add_hline(y=50, line_dash="dot", line_color="gray", row=3, col=1) + + fig.update_layout( + title=title, + template=self.style, + height=800, + xaxis_rangeslider_visible=False + ) + + return fig + + def plot_performance_dashboard(self, backtest_results: Dict, + benchmark_data: pd.Series = None) -> go.Figure: + """Create comprehensive performance dashboard""" + results_df = backtest_results['results_df'] + metrics = backtest_results['performance_metrics'] + + fig = make_subplots( + rows=3, cols=3, + subplot_titles=[ + 'Portfolio Value', 'Drawdown', 'Rolling Sharpe', + 'Returns Distribution', 'Monthly Returns', 'Risk Metrics', + 'Cumulative Returns', 'Volatility', 'Trade Analysis' + ], + specs=[ + [{"secondary_y": False}, {"secondary_y": False}, {"secondary_y": False}], + [{"secondary_y": False}, {"secondary_y": False}, {"secondary_y": False}], + [{"secondary_y": False}, {"secondary_y": False}, {"secondary_y": False}] + ], + vertical_spacing=0.08, + horizontal_spacing=0.08 + ) + + # 1. Portfolio Value + fig.add_trace( + go.Scatter( + x=results_df.index, + y=results_df['portfolio_value'], + mode='lines', + name='Portfolio', + line=dict(color=self.colors['primary'], width=2) + ), + row=1, col=1 + ) + + if benchmark_data is not None: + benchmark_cumulative = (1 + benchmark_data).cumprod() * backtest_results['initial_capital'] + fig.add_trace( + go.Scatter( + x=benchmark_cumulative.index, + y=benchmark_cumulative, + mode='lines', + name='Benchmark', + line=dict(color=self.colors['secondary'], width=2) + ), + row=1, col=1 + ) + + # 2. Drawdown + rolling_max = results_df['portfolio_value'].expanding().max() + drawdown = (results_df['portfolio_value'] - rolling_max) / rolling_max * 100 + + fig.add_trace( + go.Scatter( + x=drawdown.index, + y=drawdown, + mode='lines', + fill='tonexty', + name='Drawdown', + line=dict(color=self.colors['danger'], width=1), + fillcolor='rgba(214, 39, 40, 0.3)' + ), + row=1, col=2 + ) + + # 3. Rolling Sharpe + rolling_returns = results_df['returns'] + rolling_sharpe = rolling_returns.rolling(60).mean() / rolling_returns.rolling(60).std() * np.sqrt(252) + + fig.add_trace( + go.Scatter( + x=rolling_sharpe.index, + y=rolling_sharpe, + mode='lines', + name='Rolling Sharpe', + line=dict(color=self.colors['info'], width=2) + ), + row=1, col=3 + ) + + # 4. Returns Distribution + fig.add_trace( + go.Histogram( + x=results_df['returns'] * 100, + nbinsx=50, + name='Returns', + marker_color=self.colors['primary'], + opacity=0.7 + ), + row=2, col=1 + ) + + # 5. Monthly Returns Heatmap + monthly_returns = results_df['returns'].resample('M').apply(lambda x: (1 + x).prod() - 1) + monthly_returns_pivot = monthly_returns.to_frame('returns') + monthly_returns_pivot['year'] = monthly_returns_pivot.index.year + monthly_returns_pivot['month'] = monthly_returns_pivot.index.month + + pivot_table = monthly_returns_pivot.pivot_table( + values='returns', index='year', columns='month', fill_value=0 + ) * 100 + + fig.add_trace( + go.Heatmap( + z=pivot_table.values, + x=pivot_table.columns, + y=pivot_table.index, + colorscale='RdYlGn', + name='Monthly Returns', + showscale=False + ), + row=2, col=2 + ) + + # 6. Risk Metrics (Text) + risk_text = f""" + Sharpe: {metrics['sharpe_ratio']:.2f}
+ Sortino: {metrics['sortino_ratio']:.2f}
+ Max DD: {metrics['max_drawdown']:.2%}
+ Volatility: {metrics['volatility']:.2%}
+ VaR 95%: {metrics.get('var_95', 0):.2%}
+ Calmar: {metrics['calmar_ratio']:.2f} + """ + + fig.add_annotation( + text=risk_text, + xref="x domain", yref="y domain", + x=0.5, y=0.5, + showarrow=False, + font=dict(size=12), + row=2, col=3 + ) + + # 7. Cumulative Returns + cumulative_returns = (1 + results_df['returns']).cumprod() + fig.add_trace( + go.Scatter( + x=cumulative_returns.index, + y=cumulative_returns, + mode='lines', + name='Cumulative Returns', + line=dict(color=self.colors['success'], width=2) + ), + row=3, col=1 + ) + + # 8. Rolling Volatility + rolling_vol = results_df['returns'].rolling(30).std() * np.sqrt(252) * 100 + fig.add_trace( + go.Scatter( + x=rolling_vol.index, + y=rolling_vol, + mode='lines', + name='30D Volatility', + line=dict(color=self.colors['warning'], width=2) + ), + row=3, col=2 + ) + + # 9. Win/Loss Analysis + if 'trades' in backtest_results: + trades = backtest_results['trades'] + if trades: + trade_pnls = [trade.get('pnl', 0) for trade in trades] + wins = [pnl for pnl in trade_pnls if pnl > 0] + losses = [pnl for pnl in trade_pnls if pnl < 0] + + fig.add_trace( + go.Bar( + x=['Wins', 'Losses'], + y=[len(wins), len(losses)], + name='Trade Count', + marker_color=[self.colors['success'], self.colors['danger']] + ), + row=3, col=3 + ) + + fig.update_layout( + title="Performance Dashboard", + template=self.style, + height=1200, + showlegend=False + ) + + return fig + + def plot_strategy_comparison(self, strategies_results: Dict[str, Dict], + title: str = "Strategy Comparison") -> go.Figure: + """Compare multiple strategies""" + fig = make_subplots( + rows=2, cols=2, + subplot_titles=[ + 'Portfolio Values', 'Risk-Return Scatter', + 'Drawdown Comparison', 'Performance Metrics' + ] + ) + + # 1. Portfolio Values + for name, results in strategies_results.items(): + results_df = results['results_df'] + fig.add_trace( + go.Scatter( + x=results_df.index, + y=results_df['portfolio_value'], + mode='lines', + name=name, + line=dict(width=2) + ), + row=1, col=1 + ) + + # 2. Risk-Return Scatter + returns = [] + volatilities = [] + names = [] + + for name, results in strategies_results.items(): + metrics = results['performance_metrics'] + returns.append(metrics['annualized_return'] * 100) + volatilities.append(metrics['volatility'] * 100) + names.append(name) + + fig.add_trace( + go.Scatter( + x=volatilities, + y=returns, + mode='markers+text', + text=names, + textposition="top center", + name='Strategies', + marker=dict(size=10) + ), + row=1, col=2 + ) + + # 3. Drawdown Comparison + for name, results in strategies_results.items(): + results_df = results['results_df'] + rolling_max = results_df['portfolio_value'].expanding().max() + drawdown = (results_df['portfolio_value'] - rolling_max) / rolling_max * 100 + + fig.add_trace( + go.Scatter( + x=drawdown.index, + y=drawdown, + mode='lines', + name=f'{name} DD', + line=dict(width=1) + ), + row=2, col=1 + ) + + # 4. Performance Metrics Table + metrics_data = [] + for name, results in strategies_results.items(): + metrics = results['performance_metrics'] + metrics_data.append([ + name, + f"{metrics['total_return']:.2%}", + f"{metrics['sharpe_ratio']:.2f}", + f"{metrics['max_drawdown']:.2%}", + f"{metrics['volatility']:.2%}" + ]) + + fig.add_trace( + go.Table( + header=dict( + values=['Strategy', 'Total Return', 'Sharpe', 'Max DD', 'Volatility'], + fill_color='paleturquoise', + align='left' + ), + cells=dict( + values=list(zip(*metrics_data)), + fill_color='lavender', + align='left' + ) + ), + row=2, col=2 + ) + + fig.update_layout( + title=title, + template=self.style, + height=800 + ) + + return fig + + def plot_correlation_analysis(self, returns_matrix: pd.DataFrame, + title: str = "Correlation Analysis") -> go.Figure: + """Plot correlation analysis""" + fig = make_subplots( + rows=2, cols=2, + subplot_titles=[ + 'Correlation Heatmap', 'Rolling Correlations', + 'PCA Analysis', 'Diversification Benefits' + ] + ) + + # 1. Correlation Heatmap + correlation_matrix = returns_matrix.corr() + + fig.add_trace( + go.Heatmap( + z=correlation_matrix.values, + x=correlation_matrix.columns, + y=correlation_matrix.index, + colorscale='RdBu', + zmid=0, + name='Correlation' + ), + row=1, col=1 + ) + + # 2. Rolling Correlations (first two assets) + if len(returns_matrix.columns) >= 2: + asset1, asset2 = returns_matrix.columns[0], returns_matrix.columns[1] + rolling_corr = returns_matrix[asset1].rolling(60).corr(returns_matrix[asset2]) + + fig.add_trace( + go.Scatter( + x=rolling_corr.index, + y=rolling_corr, + mode='lines', + name=f'{asset1} vs {asset2}', + line=dict(width=2) + ), + row=1, col=2 + ) + + # 3. PCA Analysis + from sklearn.decomposition import PCA + pca = PCA() + pca.fit(returns_matrix.dropna()) + + explained_variance = pca.explained_variance_ratio_[:10] # First 10 components + cumulative_variance = np.cumsum(explained_variance) + + fig.add_trace( + go.Bar( + x=list(range(1, len(explained_variance) + 1)), + y=explained_variance * 100, + name='Individual', + marker_color=self.colors['primary'] + ), + row=2, col=1 + ) + + fig.add_trace( + go.Scatter( + x=list(range(1, len(cumulative_variance) + 1)), + y=cumulative_variance * 100, + mode='lines+markers', + name='Cumulative', + line=dict(color=self.colors['danger'], width=2), + yaxis='y2' + ), + row=2, col=1 + ) + + # 4. Diversification Benefits + equal_weight_portfolio = returns_matrix.mean(axis=1) + individual_vol = returns_matrix.std() * np.sqrt(252) * 100 + portfolio_vol = equal_weight_portfolio.std() * np.sqrt(252) * 100 + + diversification_ratio = individual_vol.mean() / portfolio_vol + + fig.add_trace( + go.Bar( + x=['Individual Assets (Avg)', 'Equal Weight Portfolio'], + y=[individual_vol.mean(), portfolio_vol], + name='Volatility', + marker_color=[self.colors['warning'], self.colors['success']] + ), + row=2, col=2 + ) + + fig.update_layout( + title=title, + template=self.style, + height=800 + ) + + return fig + + def plot_factor_analysis(self, factor_results: Dict, + title: str = "Factor Analysis") -> go.Figure: + """Plot factor analysis results""" + if 'factor_loadings' not in factor_results: + raise ValueError("Factor analysis results required") + + fig = make_subplots( + rows=2, cols=2, + subplot_titles=[ + 'Factor Loadings', 'Factor Contributions', + 'Risk Attribution', 'Factor Performance' + ] + ) + + # 1. Factor Loadings + factors = list(factor_results['factor_loadings'].keys()) + loadings = list(factor_results['factor_loadings'].values()) + + fig.add_trace( + go.Bar( + x=factors, + y=loadings, + name='Factor Loadings', + marker_color=self.colors['primary'] + ), + row=1, col=1 + ) + + # 2. Factor Contributions + if 'factor_contributions' in factor_results: + contributions = list(factor_results['factor_contributions'].values()) + + fig.add_trace( + go.Bar( + x=factors, + y=contributions, + name='Contributions', + marker_color=self.colors['success'] + ), + row=1, col=2 + ) + + # 3. Risk Attribution + systematic_risk = factor_results.get('total_systematic_risk', 0) + idiosyncratic_risk = factor_results.get('idiosyncratic_risk', 0) + + fig.add_trace( + go.Pie( + labels=['Systematic Risk', 'Idiosyncratic Risk'], + values=[systematic_risk, idiosyncratic_risk], + name='Risk Attribution' + ), + row=2, col=1 + ) + + # 4. R-squared and Alpha + r_squared = factor_results.get('r_squared', 0) + alpha = factor_results.get('alpha', 0) + + fig.add_annotation( + text=f"R-squared: {r_squared:.2%}
Alpha: {alpha:.2%}", + xref="x domain", yref="y domain", + x=0.5, y=0.5, + showarrow=False, + font=dict(size=14), + row=2, col=2 + ) + + fig.update_layout( + title=title, + template=self.style, + height=600 + ) + + return fig + + def create_interactive_dashboard(self, backtest_results: Dict, + strategy_name: str = "Strategy") -> go.Figure: + """Create comprehensive interactive dashboard""" + results_df = backtest_results['results_df'] + metrics = backtest_results['performance_metrics'] + + # Create main dashboard with multiple tabs + fig = go.Figure() + + # Add portfolio value trace + fig.add_trace( + go.Scatter( + x=results_df.index, + y=results_df['portfolio_value'], + mode='lines', + name='Portfolio Value', + line=dict(color=self.colors['primary'], width=3), + hovertemplate='Date: %{x}
' + + 'Portfolio Value: $%{y:,.2f}
' + + '' + ) + ) + + # Add benchmark line + initial_value = backtest_results.get('initial_capital', 100000) + fig.add_hline( + y=initial_value, + line_dash="dash", + line_color="red", + annotation_text="Initial Capital" + ) + + # Update layout with comprehensive styling + fig.update_layout( + title=dict( + text=f"{strategy_name} - Interactive Performance Dashboard", + x=0.5, + font=dict(size=20) + ), + template=self.style, + height=600, + hovermode='x unified', + showlegend=True, + legend=dict( + yanchor="top", + y=0.99, + xanchor="left", + x=0.01 + ), + annotations=[ + dict( + text=f"Total Return: {metrics['total_return']:.2%} | " + + f"Sharpe: {metrics['sharpe_ratio']:.2f} | " + + f"Max DD: {metrics['max_drawdown']:.2%}", + showarrow=False, + xref="paper", yref="paper", + x=0.5, y=1.02, + xanchor='center', + font=dict(size=12, color="gray") + ) + ] + ) + + # Add range selector + fig.update_layout( + xaxis=dict( + rangeselector=dict( + buttons=list([ + dict(count=1, label="1M", step="month", stepmode="backward"), + dict(count=3, label="3M", step="month", stepmode="backward"), + dict(count=6, label="6M", step="month", stepmode="backward"), + dict(count=1, label="1Y", step="year", stepmode="backward"), + dict(step="all") + ]) + ), + rangeslider=dict(visible=True), + type="date" + ) + ) + + return fig + + +# Utility functions for quick plotting +def quick_performance_plot(returns: pd.Series, title: str = "Performance Analysis"): + """Quick performance plot for returns series""" + visualizer = TradingVisualizer() + + # Create simple performance data + cumulative_returns = (1 + returns).cumprod() + + fig = go.Figure() + + fig.add_trace( + go.Scatter( + x=cumulative_returns.index, + y=cumulative_returns, + mode='lines', + name='Cumulative Returns', + line=dict(width=2) + ) + ) + + fig.update_layout( + title=title, + template='plotly_white', + height=400 + ) + + return fig + + +def quick_drawdown_plot(portfolio_values: pd.Series, title: str = "Drawdown Analysis"): + """Quick drawdown plot""" + rolling_max = portfolio_values.expanding().max() + drawdown = (portfolio_values - rolling_max) / rolling_max * 100 + + fig = go.Figure() + + fig.add_trace( + go.Scatter( + x=drawdown.index, + y=drawdown, + mode='lines', + fill='tonexty', + name='Drawdown', + line=dict(color='red', width=1), + fillcolor='rgba(255, 0, 0, 0.3)' + ) + ) + + fig.update_layout( + title=title, + template='plotly_white', + height=300, + yaxis_title="Drawdown (%)" + ) + + return fig + + +# Example usage +if __name__ == "__main__": + # Generate sample data for demonstration + np.random.seed(42) + dates = pd.date_range('2020-01-01', '2023-12-31', freq='D') + + # Create sample OHLCV data + base_price = 100 + returns = np.random.randn(len(dates)) * 0.02 + prices = base_price * np.exp(np.cumsum(returns)) + + # Generate OHLC from close prices + high_prices = prices * (1 + np.abs(np.random.randn(len(dates)) * 0.01)) + low_prices = prices * (1 - np.abs(np.random.randn(len(dates)) * 0.01)) + open_prices = np.roll(prices, 1) + open_prices[0] = base_price + + sample_data = pd.DataFrame({ + 'open': open_prices, + 'high': high_prices, + 'low': low_prices, + 'close': prices, + 'volume': np.random.randint(1000, 10000, len(dates)), + 'returns': np.concatenate([[0], np.diff(np.log(prices))]) + }, index=dates) + + # Add some technical indicators + sample_data['sma_20'] = sample_data['close'].rolling(20).mean() + sample_data['sma_50'] = sample_data['close'].rolling(50).mean() + sample_data['rsi_14'] = 50 + 30 * np.sin(np.arange(len(dates)) * 0.1) # Fake RSI + + # Create sample signals + signals = pd.Series(0, index=dates) + signals[sample_data['close'] > sample_data['sma_20']] = 1 + signals[sample_data['close'] < sample_data['sma_20']] = -1 + + # Create visualizer + visualizer = TradingVisualizer() + + # Example plots + print("Creating visualization examples...") + + # 1. Price and signals chart + price_fig = visualizer.plot_price_and_signals(sample_data, signals) + price_fig.show() + + # 2. Candlestick chart + candlestick_fig = visualizer.plot_candlestick_chart(sample_data) + candlestick_fig.show() + + print("Visualization examples created successfully!") \ No newline at end of file