投资学是我上个学期学的东西了,不过当时并没有完全弄懂整个流程。近几天我的一个朋友(无中生友)找我帮忙做课设,我就尝试做了一番。
背景知识
投资组合理论,简单解释就是研究两只及以上股票(或其他资产),每只股票买多少才能最赚钱,或者效用最大,抑或是风险最小,这三种情况就对应着最大化夏普比率、最大化效用函数、最小化风险,有这些公式,其中$$w_i$$是第i只股票购买的权重(权重可小于0,代表做空):
收益:E(R_p)=\sum_{i}w_iE(R_i)
风险:\sigma_p^2=\sum_{i}\sum_{j}w_i w_j Cov(i,j)
夏普比率:S=\frac{E(R_p)}{\sigma_p}
效用:U=E(R_p)-\frac{1}{2}A\sigma_p^2
获取数据
看了下baostock的api参考,写了如下代码:
def bs_login():
lg = bs.login()
return [lg.error_code, lg.error_msg]
def getKdata(codes, sdate, edate, f):
result = pd.DataFrame()
for i in range(len(codes)):
code = codes[i]
rs = bs.query_history_k_data_plus(code,
"date,pctChg",
start_date=sdate, end_date=edate, frequency=f, adjustflag='2')
data_list = []
while (rs.error_code == '0') & rs.next():
data_list.append(rs.get_row_data())
if i == 0:
result = pd.DataFrame(data_list, columns=rs.fields)
result.rename(columns={'pctChg': code}, inplace=True)
else:
temp = pd.DataFrame(data_list, columns=rs.fields)
temp.rename(columns={'pctChg': code}, inplace=True)
result = pd.merge(result, temp, left_on='date', right_on='date')
resultList = [[float(s) for s in l] for l in result.values.T.tolist()[1:]]
print(resultList)
x = {}
for i in range(len(codes)):
code = codes[i]
x[code] = resultList[i]
tempResult = pd.DataFrame(x, columns=codes)
return tempResult
在其他地方执行这两个函数:
bs_login()
# args = sys.argv
args = ['', 'sz.399300,sz.300657,sh.603533,sz.300391,sh.600130,sh.600519', '2019-01-01', '2020-01-01', 'm']
最小方差组合
顾名思义,最小方差组合就是能使方差最小的投资组合,首先写出求方差的函数:
def varP(w):
global data
covariance = np.cov(data.values.T)
return w.T.dot(covariance).dot(w)
然后用scipy的optimize模块优化
def solveMinVariance():
global n_stocks, ers
w0 = np.zeros(n_stocks)
w0[0] = 1
cons = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
optv = sp.optimize.minimize(varP, w0, method='SLSQP', constraints=cons)
w = optv.x
erp = 0
for i in range(n_stocks):
er = ers[i]
erp += er*w[i]
sigmaP2 = varP(w)
sharpeP = sP(w)
return [w.tolist(), erp, sigmaP2, -sharpeP]
最优风险组合
顾名思义,最优风险组合意思就是最优秀的风险组合,什么是“优秀”呢?在这里的“优秀”,指的其实就是夏普比率高,也就是高收益低风险。
全协方差模型
类似的思路,先写出求夏普比率的函数
def sP(w):
global data, n_stocks, rf
erp = 0
for i in range(n_stocks):
er = ers[i]
erp += er*w[i]
covariance = np.cov(data.values.T)
return -(erp-rf)/np.sqrt(w.T.dot(covariance).dot(w))
同样地使用scipy的optimize优化,这里为了避免弄成局部最优解,使用了$$E(R_i)/\sigma_i$$作初值
def solveMaxSharpe():
global n_stocks
w0 = np.zeros(n_stocks)
w0[0] = 0.01
for i in range(1, n_stocks):
w0[i] = was[i - 1]
wbeforeSum = np.sum(w0)
for i in range(n_stocks):
w0[i] = w0[i]/wbeforeSum
cons = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
#bnds = tuple((0, 1) for x in range(n_stocks))
optv = sp.optimize.minimize(sP, w0, method='SLSQP', constraints=cons)
w = optv.x
erp = 0
for i in range(n_stocks):
er = ers[i]
erp += er*w[i]
sigmaP2 = varP(w)
sharpeP = sP(w)
return [w.tolist(), erp, sigmaP2, -sharpeP]
单指数模型
大家可能都发现了,上面写了个词叫“全协方差模型”,这个模型的步骤太少了,为了帮朋友完成课设,这么点字估计不行。因此我就盯上了基于CAPM模型的单指数模型的最优化过程,这个步骤就多了,但计算量没全协方差模型大。不过最终优化得出的夏普比率没有它高,但也十分接近了。
def singleIndex():
print('单指数模型计算过程:')
print(' E(RM)=', erm)
print(' sigma(M)=', sigmaM)
print(' 模型估计alpha值:', alphas)
print(' 模型估计beta值:', betas)
print(' 模型估计残差方差:', sigmas)
print(' 计算原始头寸:', was)
sumwas = np.sum(was)
for i in range(len(was)):
was[i] = was[i]/sumwas
print(' 调整组合权重:', was)
alphaA = np.array(was).T.dot(np.array(alphas))
print(' 计算积极组合的alpha值:', alphaA)
sigmaA = (np.array(was)**2).T.dot(np.array(sigmas))
print(' 计算积极组合的残差的方差:', sigmaA)
wA = alphaA/sigmaA/(erm/sigmaM)
print(' 计算积极组合的原始头寸:',wA)
betaA = np.array(was).T.dot(np.array(betas))
print(' 计算积极组合的beta值:',betaA)
wAstar = abs(wA/(1+(1-betaA)*wA))
print(' 调整积极组合的原始头寸:',wAstar)
wMstar = 1 - wAstar
print(' 此时最优风险组合的权重:',wMstar)
ws = [wMstar]
for s in np.array(was).dot(wAstar):
ws.append(s)
ws = np.array(ws)
sharpeRatio = -sP(ws)
varianceAll = varP(ws)
er = 0
for i in range(n_stocks):
e = ers[i] - rf
er += e*ws[i]
print(' 计算最优风险组合的风险溢价:',er/100)
print(' 计算最优风险组合的收益率:', (er+rf)/100)
print(' 计算最优风险组合的收益率(换算为年):', (1+(er+rf)/100)**dataTimetoYear-1)
print(' 计算最优风险组合的组合方差:',varianceAll)
print(' 计算夏普比率:', sharpeRatio*np.sqrt(dataTimetoYear))
yStarS = (er + rf)/(A*varianceAll)
print(' 最优完全资产组合中最优风险组合的比重(风险厌恶系数A=3,无风险利率rf=',rf,'):', yStarS)
print(' ')
全部代码
import baostock as bs
import pandas as pd
import numpy as np
import scipy as sp
import scipy.optimize as opt
import sys
import statsmodels.api as sm
def bs_login():
lg = bs.login()
return [lg.error_code, lg.error_msg]
def getKdata(codes, sdate, edate, f):
result = pd.DataFrame()
for i in range(len(codes)):
code = codes[i]
rs = bs.query_history_k_data_plus(code,
"date,pctChg",
start_date=sdate, end_date=edate, frequency=f, adjustflag='2')
data_list = []
while (rs.error_code == '0') & rs.next():
data_list.append(rs.get_row_data())
if i == 0:
result = pd.DataFrame(data_list, columns=rs.fields)
result.rename(columns={'pctChg': code}, inplace=True)
else:
temp = pd.DataFrame(data_list, columns=rs.fields)
temp.rename(columns={'pctChg': code}, inplace=True)
result = pd.merge(result, temp, left_on='date', right_on='date')
resultList = [[float(s) for s in l] for l in result.values.T.tolist()[1:]]
print(resultList)
x = {}
for i in range(len(codes)):
code = codes[i]
x[code] = resultList[i]
tempResult = pd.DataFrame(x, columns=codes)
return tempResult
def varP(w):
global data
covariance = np.cov(data.values.T)
return w.T.dot(covariance).dot(w)
def sP(w):
global data, n_stocks, rf
erp = 0
for i in range(n_stocks):
er = ers[i]
erp += er*w[i]
covariance = np.cov(data.values.T)
return -(erp-rf)/np.sqrt(w.T.dot(covariance).dot(w))
# minimum variance portfolio
def solveMinVariance():
global n_stocks, ers
w0 = np.zeros(n_stocks)
w0[0] = 1
cons = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
optv = sp.optimize.minimize(varP, w0, method='SLSQP', constraints=cons)
w = optv.x
erp = 0
for i in range(n_stocks):
er = ers[i]
erp += er*w[i]
sigmaP2 = varP(w)
sharpeP = sP(w)
return [w.tolist(), erp, sigmaP2, -sharpeP]
# maximum sharpe ratio
def solveMaxSharpe():
global n_stocks
w0 = np.zeros(n_stocks)
w0[0] = 0.01
for i in range(1, n_stocks):
w0[i] = was[i - 1]
wbeforeSum = np.sum(w0)
for i in range(n_stocks):
w0[i] = w0[i]/wbeforeSum
cons = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
#bnds = tuple((0, 1) for x in range(n_stocks))
optv = sp.optimize.minimize(sP, w0, method='SLSQP', constraints=cons)
w = optv.x
erp = 0
for i in range(n_stocks):
er = ers[i]
erp += er*w[i]
sigmaP2 = varP(w)
sharpeP = sP(w)
return [w.tolist(), erp, sigmaP2, -sharpeP]
def singleIndex():
print('单指数模型计算过程:')
print(' E(RM)=', erm)
print(' sigma(M)=', sigmaM)
print(' 模型估计alpha值:', alphas)
print(' 模型估计beta值:', betas)
print(' 模型估计残差方差:', sigmas)
print(' 计算原始头寸:', was)
sumwas = np.sum(was)
for i in range(len(was)):
was[i] = was[i]/sumwas
print(' 调整组合权重:', was)
alphaA = np.array(was).T.dot(np.array(alphas))
print(' 计算积极组合的alpha值:', alphaA)
sigmaA = (np.array(was)**2).T.dot(np.array(sigmas))
print(' 计算积极组合的残差的方差:', sigmaA)
wA = alphaA/sigmaA/(erm/sigmaM)
print(' 计算积极组合的原始头寸:',wA)
betaA = np.array(was).T.dot(np.array(betas))
print(' 计算积极组合的beta值:',betaA)
wAstar = abs(wA/(1+(1-betaA)*wA))
print(' 调整积极组合的原始头寸:',wAstar)
wMstar = 1 - wAstar
print(' 此时最优风险组合的权重:',wMstar)
ws = [wMstar]
for s in np.array(was).dot(wAstar):
ws.append(s)
ws = np.array(ws)
sharpeRatio = -sP(ws)
varianceAll = varP(ws)
er = 0
for i in range(n_stocks):
e = ers[i] - rf
er += e*ws[i]
print(' 计算最优风险组合的风险溢价:',er/100)
print(' 计算最优风险组合的收益率:', (er+rf)/100)
print(' 计算最优风险组合的收益率(换算为年):', (1+(er+rf)/100)**dataTimetoYear-1)
print(' 计算最优风险组合的组合方差:',varianceAll)
print(' 计算夏普比率:', sharpeRatio*np.sqrt(dataTimetoYear))
yStarS = (er + rf)/(A*varianceAll)
print(' 最优完全资产组合中最优风险组合的比重(风险厌恶系数A=3,无风险利率rf=',rf,'):', yStarS)
print(' ')
bs_login()
# args = sys.argv
args = ['', 'sz.399300,sz.300657,sh.603533,sz.300391,sh.600130,sh.600519', '2019-01-01', '2020-01-01', 'm']
dataTimetoYear = 365
if args[4] == 'd':
dataTimetoYear = 365
if args[4] == 'm':
dataTimetoYear = 12
if args[4] == 'w':
dataTimetoYear = 48
n_stocks = len(args[1].split(','))
data = getKdata(args[1].split(','), args[2], args[3], args[4])
ers = [np.mean(data.values.T[i]) for i in range(n_stocks)]
A = 3
rf = 3/dataTimetoYear
x = data.values.T[0] - rf
X = sm.add_constant(x)
erm = np.mean(x)
sigmaM = np.cov(x)
alphas = []
betas = []
sigmas = []
was = []
for i in range(len(data.values.T) - 1):
y = data.values.T[i+1] - rf
model = sm.OLS(y, X)
results = model.fit()
alphas.append(results.params[0])
betas.append(results.params[1])
sigmas.append(np.cov(results.resid).tolist())
was.append(alphas[i]/sigmas[i])
resultMV = solveMinVariance()
resultMS = solveMaxSharpe()
yStar = (resultMS[1] - rf)/(A*resultMS[2])
print('全协方差模型(最小化方差)')
print(' 计算最小方差组合权重:', resultMV[0])
print(' 计算最小方差组合收益率:', resultMV[1]/100)
print(' 计算最小方差组合收益率(换算为年):', (1+resultMV[1]/100)**dataTimetoYear-1)
print(' 计算最小方差组合方差:', resultMV[2])
print(' 计算最小方差组合夏普比率:', resultMV[3]*np.sqrt(dataTimetoYear))
print(' ')
print('全协方差模型(最大化夏普比率)')
print(' 计算最优风险组合权重:', resultMS[0])
print(' 计算最优风险组合收益率:', resultMS[1]/100)
print(' 计算最优风险组合收益率(换算为年):', (1+resultMS[1]/100)**dataTimetoYear-1)
print(' 计算最优风险组合方差:', resultMS[2])
print(' 计算最优风险组合夏普比率:', resultMS[3]*np.sqrt(dataTimetoYear))
print(' 计算最优完全资产组合中最优风险组合的比重(风险厌恶系数A=3,无风险利率rf=',rf,'):', yStar)
print(' ')
singleIndex()
data.to_excel('test.xlsx')