Statistics - Essential Statistical Analysis for Quantitative Finance
Overview
This utility provides comprehensive statistical analysis tools essential for quantitative finance, risk management, and investment analysis. Statistics forms the foundation for understanding financial data patterns, risk assessment, and predictive modeling.
Key Concepts
Descriptive Statistics
- Measures of Central Tendency: Mean, median, mode
- Measures of Dispersion: Variance, standard deviation, range
- Distribution Shape: Skewness, kurtosis
- Position Measures: Percentiles, quartiles, z-scores
Probability Distributions
- Normal Distribution: Stock returns, central limit theorem
- Log-Normal Distribution: Asset prices, positive values
- Student's t-Distribution: Small sample statistics
- Chi-Square Distribution: Variance testing, goodness of fit
Statistical Inference
- Hypothesis Testing: t-tests, ANOVA, chi-square tests
- Confidence Intervals: Parameter estimation
- Correlation Analysis: Linear relationships
- Regression Analysis: Predictive modeling
Financial Applications
- Risk Metrics: Value at Risk (VaR), Expected Shortfall
- Portfolio Analysis: Sharpe ratio, diversification benefits
- Performance Measurement: Alpha, beta, tracking error
- Market Analysis: Trend analysis, momentum strategies
Implementation
Descriptive Statistics
import numpy as np
from scipy import stats
def calculate_descriptive_stats(data: np.ndarray) -> dict:
"""Calculate comprehensive descriptive statistics."""
return {
'count': len(data),
'mean': np.mean(data),
'median': np.median(data),
'mode': stats.mode(data).mode[0] if len(data) > 0 else None,
'std': np.std(data),
'var': np.var(data),
'min': np.min(data),
'max': np.max(data),
'range': np.max(data) - np.min(data),
'q25': np.percentile(data, 25),
'q75': np.percentile(data, 75),
'iqr': np.percentile(data, 75) - np.percentile(data, 25),
'skewness': stats.skew(data),
'kurtosis': stats.kurtosis(data)
}
def calculate_financial_returns(prices: np.ndarray) -> dict:
"""Calculate financial return statistics."""
returns = np.diff(prices) / prices[:-1]
return {
'total_return': (prices[-1] / prices[0]) - 1,
'annualized_return': np.mean(returns) * 252, # Assuming daily returns
'annualized_volatility': np.std(returns) * np.sqrt(252),
'sharpe_ratio': np.mean(returns) / np.std(returns) * np.sqrt(252),
'max_drawdown': calculate_max_drawdown(prices),
'var_95': np.percentile(returns, 5), # 95% Value at Risk
'cvar_95': calculate_conditional_var(returns, 0.95)
}
Probability Distributions
def normal_distribution_analysis(data: np.ndarray) -> dict:
"""Analyze normal distribution properties."""
mu, sigma = stats.norm.fit(data)
return {
'mean': mu,
'std': sigma,
'shapiro_test': stats.shapiro(data), # Normality test
'ks_test': stats.kstest(data, 'norm'), # Kolmogorov-Smirnov test
'confidence_interval_95': stats.norm.interval(0.95, mu, sigma),
'percentile_5': stats.norm.ppf(0.05, mu, sigma),
'percentile_95': stats.norm.ppf(0.95, mu, sigma)
}
def lognormal_analysis(positive_data: np.ndarray) -> dict:
"""Analyze log-normal distribution for asset prices."""
log_data = np.log(positive_data)
mu, sigma = stats.norm.fit(log_data)
return {
'log_mean': mu,
'log_std': sigma,
'expected_value': np.exp(mu + sigma**2 / 2),
'median': np.exp(mu),
'mode': np.exp(mu - sigma**2),
'variance': (np.exp(sigma**2) - 1) * np.exp(2*mu + sigma**2)
}
Hypothesis Testing
def perform_hypothesis_tests(sample1: np.ndarray, sample2: np.ndarray) -> dict:
"""Perform comprehensive hypothesis tests."""
return {
't_test': stats.ttest_ind(sample1, sample2),
'mann_whitney': stats.mannwhitneyu(sample1, sample2, alternative='two-sided'),
'ks_test': stats.ks_2samp(sample1, sample2),
'levene_test': stats.levene(sample1, sample2), # Equal variances
'f_test': stats.f_oneway(sample1, sample2)
}
def correlation_analysis(returns_matrix: np.ndarray) -> dict:
"""Analyze correlations between assets."""
correlations = np.corrcoef(returns_matrix)
return {
'correlation_matrix': correlations,
'average_correlation': np.mean(correlations[np.triu_indices_from(correlations, k=1)]),
'max_correlation': np.max(correlations),
'min_correlation': np.min(correlations),
'diversification_ratio': len(returns_matrix) / (1 + np.sum(correlations)) if len(returns_matrix) > 1 else 1
}
Risk Metrics
def calculate_value_at_risk(returns: np.ndarray, confidence_level: float = 0.95) -> float:
"""Calculate Value at Risk using historical simulation."""
return -np.percentile(returns, (1 - confidence_level) * 100)
def calculate_expected_shortfall(returns: np.ndarray, confidence_level: float = 0.95) -> float:
"""Calculate Expected Shortfall (Conditional VaR)."""
var_threshold = calculate_value_at_risk(returns, confidence_level)
tail_losses = returns[returns <= -var_threshold]
return -np.mean(tail_losses)
def calculate_max_drawdown(price_series: np.ndarray) -> float:
"""Calculate maximum drawdown from price series."""
peak = price_series[0]
max_drawdown = 0
for price in price_series:
if price > peak:
peak = price
drawdown = (peak - price) / peak
max_drawdown = max(max_drawdown, drawdown)
return max_drawdown
Examples
Example 1: Portfolio Risk Analysis
def portfolio_risk_analysis():
"""Comprehensive portfolio risk analysis."""
print("=== Portfolio Risk Analysis ===")
# Simulate portfolio returns
np.random.seed(42)
n_assets = 5
n_periods = 1000
# Generate correlated returns
mean_returns = np.array([0.001, 0.0008, 0.0012, 0.0009, 0.0011])
cov_matrix = np.array([
[0.0004, 0.0002, 0.0001, 0.00015, 0.0001],
[0.0002, 0.0003, 0.00015, 0.0001, 0.00012],
[0.0001, 0.00015, 0.0005, 0.0002, 0.00018],
[0.00015, 0.0001, 0.0002, 0.0004, 0.00016],
[0.0001, 0.00012, 0.00018, 0.00016, 0.00035]
])
returns = np.random.multivariate_normal(mean_returns, cov_matrix, n_periods)
# Portfolio weights
weights = np.array([0.3, 0.25, 0.2, 0.15, 0.1])
# Calculate portfolio statistics
portfolio_returns = np.dot(returns, weights)
stats = calculate_descriptive_stats(portfolio_returns)
print(f"Portfolio Mean Return: {stats['mean']".6f"}")
print(f"Portfolio Volatility: {stats['std']".6f"}")
print(f"Sharpe Ratio: {stats['mean'] / stats['std']".4f"}")
# Risk metrics
var_95 = calculate_value_at_risk(portfolio_returns, 0.95)
cvar_95 = calculate_expected_shortfall(portfolio_returns, 0.95)
print(f"95% VaR: {var_95".6f"}")
print(f"95% CVaR: {cvar_95".6f"}")
# Correlation analysis
corr_analysis = correlation_analysis(returns)
print(f"Average Correlation: {corr_analysis['average_correlation']".4f"}")
print(f"Diversification Ratio: {corr_analysis['diversification_ratio']".4f"}")
print()
Example 2: Hypothesis Testing for Market Efficiency
def market_efficiency_tests():
"""Test market efficiency using statistical methods."""
print("=== Market Efficiency Tests ===")
# Simulate market returns
np.random.seed(42)
n_periods = 500
# Generate random walk returns (efficient market hypothesis)
efficient_returns = np.random.normal(0.0005, 0.02, n_periods)
# Generate momentum returns (inefficient market)
momentum_returns = []
momentum_returns.append(np.random.normal(0.0005, 0.02))
for i in range(1, n_periods):
# Add momentum component
momentum = 0.1 * (momentum_returns[i-1] - 0.0005)
momentum_returns.append(np.random.normal(0.0005, 0.02) + momentum)
momentum_returns = np.array(momentum_returns)
# Test for normality
efficient_normal = stats.shapiro(efficient_returns)
momentum_normal = stats.shapiro(momentum_returns)
print(f"Efficient Market Normality Test: p-value = {efficient_normal.pvalue".4f"}")
print(f"Momentum Market Normality Test: p-value = {momentum_normal.pvalue".4f"}")
# Test for autocorrelation
efficient_autocorr = stats.pearsonr(efficient_returns[:-1], efficient_returns[1:])
momentum_autocorr = stats.pearsonr(momentum_returns[:-1], momentum_returns[1:])
print(f"Efficient Market Autocorrelation: r = {efficient_autocorr[0]".4f"}")
print(f"Momentum Market Autocorrelation: r = {momentum_autocorr[0]".4f"}")
# Variance comparison
variance_test = stats.levene(efficient_returns, momentum_returns)
print(f"Variance Equality Test: p-value = {variance_test.pvalue".4f"}")
print()
Example 3: Monte Carlo Risk Simulation
def monte_carlo_risk_simulation():
"""Monte Carlo simulation for risk analysis."""
print("=== Monte Carlo Risk Simulation ===")
# Portfolio parameters
initial_value = 1000000
expected_return = 0.08
volatility = 0.15
time_horizon = 1 # 1 year
n_simulations = 10000
confidence_level = 0.95
# Run Monte Carlo simulation
np.random.seed(42)
simulated_values = []
for _ in range(n_simulations):
# Simulate annual return
annual_return = np.random.normal(expected_return, volatility)
final_value = initial_value * (1 + annual_return)
simulated_values.append(final_value)
simulated_values = np.array(simulated_values)
# Calculate risk metrics
final_mean = np.mean(simulated_values)
final_std = np.std(simulated_values)
var_95 = np.percentile(simulated_values, (1 - confidence_level) * 100)
cvar_95 = np.mean(simulated_values[simulated_values <= var_95])
max_drawdown = (initial_value - np.min(simulated_values)) / initial_value
print(f"Expected Final Value: ${final_mean",.0f"}")
print(f"Value at Risk (95%): ${var_95",.0f"}")
print(f"Conditional VaR (95%): ${cvar_95",.0f"}")
print(f"Maximum Drawdown: {max_drawdown".2%"}")
print(f"Probability of Loss: {np.mean(simulated_values < initial_value)".2%"}")
# Generate confidence intervals
ci_lower = np.percentile(simulated_values, 2.5)
ci_upper = np.percentile(simulated_values, 97.5)
print(f"95% Confidence Interval: ${ci_lower",.0f"} - ${ci_upper",.0f"}")
print()
Testing
Run the test suite to verify functionality:
References
- Statistics for Finance
- Quantitative Finance Stack Exchange
- SciPy Statistics Documentation
- Risk Management and Financial Institutions
Learning Path
Prerequisites
- Basic probability and statistics
- Understanding of financial markets
Next Steps
- Regression Analysis: Linear and nonlinear modeling
- Time Series Analysis: ARIMA, GARCH models
- Machine Learning: Advanced predictive modeling
Assessment
- Calculate Value at Risk for a portfolio using different methods
- Perform hypothesis testing to compare two investment strategies
- Build a Monte Carlo simulation for option pricing
- Analyze the correlation structure of a multi-asset portfolio
This utility provides essential statistical tools for quantitative finance. Master these techniques to understand risk, measure performance, and make data-driven investment decisions.