资产收益数据处理与分析
动态Beta模拟、t分布数据生成、结构性断点、滚动Beta计算、异方差处理、Fama-French三因子扩展、统计检验和可视化
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from scipy.stats import ttest_1samp
# ======================
# 参数设置
# ======================
np.random.seed(42)
NUM_MONTHS = 60
MARKET_VOLATILITY = 0.0433
ALPHA_TRUE = 0.00
# ======================
# 数据生成模块
# ======================
def generate_returns():
# 基础市场收益(t分布,自由度为5)
market_returns = np.random.standard_t(df=5, size=NUM_MONTHS) * MARKET_VOLATILITY
# 生成三种不同的Beta场景
beta_scenarios = {
# 场景1:时变Beta(每两年+0.1)
"time_varying": [1.5 + 0.1 * (i // 24) for i in range(NUM_MONTHS)],
# 场景2:结构性断点(第36个月后Beta突变)
"structural_break": np.concatenate([np.full(36, 1.2), np.full(24, 1.8)]),
# 场景3:恒定Beta(作为对照组)
"constant": np.full(NUM_MONTHS, 1.5)
}
# 生成资产收益
dfs = {}
for scenario, beta_values in beta_scenarios.items():
epsilon = np.random.normal(loc=0, scale=0.05, size=NUM_MONTHS)
asset_returns = ALPHA_TRUE + np.array(beta_values) * market_returns + epsilon
dates = pd.date_range(start='2014-01-01', periods=NUM_MONTHS, freq='M')
dfs[scenario] = pd.DataFrame({
'market': market_returns,
'asset': asset_returns,
'true_beta': beta_values
}, index=dates)
return dfs
# ======================
# 分析模块
# ======================
def analyze_data(df, scenario_name):
# 滚动窗口Beta计算(24个月窗口)
df['rolling_beta'] = df['asset'].rolling(24).cov(df['market']) / df['market'].rolling(24).var()
# OLS回归
X = sm.add_constant(df['market'])
y = df['asset']
model_ols = sm.OLS(y, X).fit()
# 处理异方差性(WLS)
weights = 1 / np.abs(df['market']) # 简单权重方案
model_wls = sm.WLS(y, X, weights=weights).fit()
# Fama-French三因子扩展
df['SMB'] = np.random.normal(0, 0.03, NUM_MONTHS) # 模拟小市值因子
df['HML'] = np.random.normal(0, 0.03, NUM_MONTHS) # 模拟价值因子
X_ff3 = sm.add_constant(df[['market', 'SMB', 'HML']])
model_ff3 = sm.OLS(y, X_ff3).fit()
# ======================
# 可视化模块
# ======================
plt.figure(figsize=(15, 10))
# 动态Beta可视化
plt.subplot(2, 2, 1)
df['true_beta'].plot(label='True Beta', lw=2)
df['rolling_beta'].plot(label='24M Rolling Beta', ls='--')
if 'structural_break' in scenario_name:
plt.axvline(df.index[35], color='r', linestyle=':', label='Break Point')
plt.title(f'Beta Dynamics: {scenario_name}')
plt.legend()
# 残差诊断图
plt.subplot(2, 2, 2)
plt.scatter(model_ols.predict(X), model_ols.resid, alpha=0.6)
plt.axhline(0, color='red')
plt.title('Residuals vs Fitted Values')
plt.xlabel('Predicted')
plt.ylabel('Residuals')
# 累计收益对比
plt.subplot(2, 2, 3)
(1 + df[['market', 'asset']]).cumprod().plot()
plt.title('Cumulative Returns Comparison')
# 回归结果可视化
plt.subplot(2, 2, 4)
plt.scatter(df['market'], df['asset'], alpha=0.6)
plt.plot(df['market'], model_ols.predict(X), color='red', label='OLS')
plt.plot(df['market'], model_wls.predict(X), color='green', ls='--', label='WLS')
plt.title('Regression Lines Comparison')
plt.legend()
plt.tight_layout()
plt.show()
# ======================
# 统计检验输出
# ======================
print(f"\n=== {scenario_name.upper()} SCENARIO ANALYSIS ===")
print("\n1. OLS Regression Results:")
print(model_ols.summary())
print("\n2. WLS Regression Results (处理异方差性):")
print(model_wls.summary())
print("\n3. Fama-French三因子模型:")
print(model_ff3.summary())
print("\n4. Beta假设检验:")
t_stat, p_value = ttest_1samp(df['rolling_beta'].dropna(), popmean=1.5)
print(f"H0: Beta=1.5 | t-stat={t_stat:.2f} | p-value={p_value:.3f}")
# ======================
# 执行主程序
# ======================
dfs = generate_returns()
for scenario_name, df in dfs.items():
analyze_data(df, scenario_name)