import numpy as np
import numpy.random as npr
import matplotlib.pyplot as plt
import pandas as pd
import plotly.graph_objects as go
import scipy as sp
import scipy.linalg 
import scipy.stats


import pandas_datareader as pr
from pandas_datareader import data


start_date='6/30/2010'; end_date='10/24/2020'
Yahoo = data.DataReader('^GSPC', 'yahoo', start_date, end_date)


plotarray=[
    go.Scatter(y=Yahoo['Close'], x=Yahoo.index, name='Index Value', mode = 'lines')
]
figlayout = {
    'title': "S&P 500 Index",
    'yaxis':{'title':'Log Scale', 'type':'log'},
    'showlegend':True
}    #dict type
fig = go.Figure(data = plotarray, layout = figlayout)

fig.show()


Yahoo['logReturn']=np.log(Yahoo['Close']/Yahoo['Close'].shift(1))
Yahoo['logReturn'].describe()

count    2598.000000
mean        0.000467
std         0.010981
min        -0.127652
25%        -0.003382
50%         0.000653
75%         0.005264
max         0.089683
Name: logReturn, dtype: float64


rets = np.array(Yahoo['logReturn'][1:]) #the vector of log returns


pd.DataFrame(rets).describe()


x = scipy.stats.describe(rets)
y= dict(x._asdict())
y["min"] = y["minmax"][0]
y["max"] = y["minmax"][1]
y.pop("minmax");y

{'nobs': 2598,
 'mean': 0.00046673491230296213,
 'variance': 0.00012057369088690606,
 'skewness': -0.9067317822033426,
 'kurtosis': 17.749492855510095,
 'min': -0.12765219747281742,
 'max': 0.08968323251796326}


#The GBM Model takes as inputs the parameters theta = (r, sigma) and the iid shock matrix zt. 
#It returns log-returns and log-prices. One series would have sufficed as output, however.


#Quick calibration (bird's eye view) of GBM parameters: 
sigma = np.std(rets)
r = np.mean(rets)+1/2*sigma**2 #the drift term
print(f'r={round(r,5)}, sigma={round(sigma,5)}')

r=0.00053, sigma=0.01098


N=100
T = len(rets)

theta = [r,sigma]
zt = npr.standard_normal((T, N)) 

S0 = Yahoo['Close'][0]


#Simulate N GBM paths: 
def GBM(theta,zt):
    T, N = zt.shape
    S_log = np.zeros((T+1, N))
    log_Rt = np.zeros((T, N))
    
    log_Rt = (theta[0]-theta[1]**2/2)+theta[1]*zt 
    S_log0 = np.log(S0)
    S_log[0,:]=S_log0
    S_log[1:]=S_log0+np.cumsum(log_Rt, axis=0)
    return S_log, log_Rt


#Simulate the calibrated GBM path: 

[S_log, log_Rt] = GBM(theta,zt)

S = np.exp(S_log) #Quick simulation of plausible price process.

plt.plot(S[:, :], lw = '0.5')
plt.title('GBM Model - linear scale')
plt.ylabel('Bird Eye Calibration')
plt.grid(True)


LOG = np.log(np.mean(S[-1,:])/S0)/T
CAGR = (np.mean(S[-1,:])/S0)**(1/T)-1
print(round(LOG,5), round(CAGR,5))

0.00053 0.00053


#Plotting series is easy with matplotlib:

plt.plot(S_log, lw = '0.5')
plt.xlabel('# periods'); plt.ylabel('Bird Eye Calibration'); plt.title('GBM Model - logarithmic scale')
plt.grid(True)


#Plotting with plotly is much more difficult and time-consuming, but the result may be well worth the pain.

plotarray=[]

for i in range(N):
    plotarray.append(go.Scatter(y=S[:,i], x=Yahoo.index, mode = 'lines', line=dict(width=1)))

figlayout = {
    'title': "GBM Model",
    'yaxis':{'title':'Log Scale', 'type':'log'},
    'showlegend':False
}    #dict type
fig = go.Figure(data = plotarray, layout = figlayout)
fig.show()


plotarray=[go.Histogram(x=S[-1,:], nbinsx=20)]
figlayout={'title':'Distribution of Terminal Values'}
fig = go.Figure(data=plotarray, layout=figlayout)
fig.show()


#Simulate/Forecast stock prices over the next year: 

S0 = Yahoo['Close'][-1]
Tf = 252 # number of days looking forward
theta = [r,sigma]

z = npr.standard_normal((Tf, N)) 

[S_logf, log_Rtf] = GBM(theta,z)

Sf = np.exp(S_logf) #Quick simulation of plausible price process.


plt.plot(Sf[:, :], lw = '0.5')
plt.title('GBM Model Forecast')
plt.ylabel('Bird Eye Calibration')
plt.grid(True)


plotarray=[]

for i in range(N):
    plotarray.append(
    go.Scatter(y=Sf[:,i], mode = 'lines', line=dict(width=1.5))
    )
    
figlayout = {
    'title': 'GBM Model Forecast',
    'yaxis':{'title':'Log Scale', 'type':'log'},
    'showlegend':False
}    #dict type
fig = go.Figure(data = plotarray, layout = figlayout)
    
fig.show()


import scipy.stats
scipy.stats.describe((Sf[252,:]))

DescribeResult(nobs=100, minmax=(2580.4759499861802, 6679.502792970043), mean=3990.053299610971, variance=464090.22175737814, skewness=0.8224709262643638, kurtosis=1.5679353341429003)


meanret1y = np.mean(Sf[252,:])/S0-1
medianret1y = np.median(Sf[252,:])/S0-1
print('1y Mean Return','1y Median Return')
print(f'{round(meanret1y*100,2)}%,         {round(medianret1y*100,2)}%')

1y Mean Return 1y Median Return
15.14%,         12.75%


OneYearTR = Sf[252,:]/S0-1 #Tthe distribution of total returns in one year from now

TRf = pd.DataFrame(OneYearTR)
TRf.describe()


#Next I will attempt to obtain more exact GBM parameters by minimizing the squared difference between observed index values and GBM predicted values, using as starting points the bird's eye parameters above.#

import scipy as sp
from scipy import stats


# ordd = 2
# z = npr.standard_normal(rets.shape) 
# f = lambda theta: (theta[0]-theta[1]**2/2)+theta[1]*z #minimize the norm for each return

#g = lambda theta: np.linalg.norm(rets-f(theta), ord=ordd)**ordd
N=100
T = len(rets)
zopt = npr.standard_normal((T, N))

_, test = GBM([r, sigma], zopt)
test = test[:, 0]

def g(theta):
    g=0
    _, ret = GBM(theta, zopt)
#         print(s.shape)
    for sample in range(N):
        g += np.linalg.norm(rets-ret[:, sample])**2
    return g


print(r, sigma)

0.0005269985526480921 0.010978491731119528


import scipy.optimize


_, test = GBM([r, sigma], zopt)
test = test[:, 0]

def g(theta):
    g=0
    _, ret = GBM(theta, zopt)
#         print(s.shape)
    for sample in range(N):
        g += np.linalg.norm(test-ret[:, sample])**2
    return g

thetatest = scipy.optimize.fmin(func=g, x0=[r,sigma],ftol=1e-7,xtol=1e-7)
print('Parameter calibration for retsle prices', thetatest)

Optimization terminated successfully.
         Current function value: 30.774098
         Iterations: 47
         Function evaluations: 91
Parameter calibration for retsle prices [0.00020864 0.00013746]


thetaopt = scipy.optimize.fmin(func=g, x0=[r,sigma],ftol=1e-7,xtol=1e-7)
# thetaopt = scipy.optimize.brute(func=g, ranges=(slice(-1,1, 0.1), slice(0,1, 0.1)), finish=None)
print('Parameter calibration for retsle prices', thetaopt)

Optimization terminated successfully.
         Current function value: 30.774098
         Iterations: 47
         Function evaluations: 91
Parameter calibration for retsle prices [0.00020864 0.00013746]


print(g([r, sigma])/N)
print(g(thetaopt)/N)

0.6125232992355399
0.3077409791071249


plt.plot(rets)

[<matplotlib.lines.Line2D at 0x1a1c52d828>]


# _, tmp  = GBM([r, sigma], zopt)
_, tmp  = GBM(thetaopt, zopt)
plt.plot(np.abs(rets-tmp[:, 0]))

[<matplotlib.lines.Line2D at 0x1a1c837748>]


plt.plot(rets,tmp[:, 0], 'r*')

[<matplotlib.lines.Line2D at 0x1a1c690b38>]


#Simulate the calibrated GBM path: 
N=100
#r = thetaopt[0]
#sigma = thetaopt[1]
T = len(rets)
S_cont = np.zeros((T+1, N))
[S_cont, log_Rt] = GBM(thetaopt, zopt)
S_contM = np.exp(S_cont) #Simulation of retsle price process with the minimization parameters.

plt.plot(S_contM[:,0])
plt.ylabel('Fmin Calibration')
plt.grid(True)


plt.plot(S_cont)
plt.xlabel('# periods'); plt.ylabel('F min Calibration'); plt.title('Logarithmic scale')
plt.grid(True)


rets_TR = Yahoo['Close'][-1]/Yahoo['Close'][0]-1
print(rets_TR, np.mean(S_contM[-1])/100-1)

2.362138743110739 58.625220673781


#Normality test: Shapiro-Wilk

from scipy.stats import shapiro
stat, p = shapiro(rets)
print('Statistics=%.3f, p=%.3f' % (stat, p))
# interpret
alpha = 0.05
if p > alpha:
	print('Sample looks Gaussian (fail to reject H0)')
else:
	print('Sample does not look Gaussian (reject H0)')

Statistics=0.854, p=0.000
Sample does not look Gaussian (reject H0)


#Normality test: Jarque-Bera

JB, p = stats.jarque_bera(rets)
print('Statistics=%.3f, p=%.3f' % (JB, p))
# interpret
alpha = 0.05
if p > alpha:
	print('Sample looks Gaussian (fail to reject H0)')
else:
	print('Sample does not look Gaussian (reject H0)')

Statistics=34459.563, p=0.000
Sample does not look Gaussian (reject H0)


S=sp.stats.skew(rets)
EK=sp.stats.kurtosis(rets, fisher=True)
n=len(rets)
JB = n/6*(S**2 + (EK**2)/4) ; #JB proves non-normality


rollVol = Yahoo["logReturn"].rolling(252).std()
plt.plot(rollVol, 'r.'); plt.ylabel('Rolling annual standard deviation');  plt.title('Volatility is non-constant \n and appears mean-reverting')

Text(0.5, 1.0, 'Volatility is non-constant \n and appears mean-reverting')


#Higher volatility is associated to falling prices:
rollmean = Yahoo["logReturn"].rolling(252).mean()
plt.plot(rollmean, 'r.'); plt.ylabel('Rolling Annual Return');  plt.title('Higher volatility is associated with lower returns.')

Text(0.5, 1.0, 'Higher volatility is associated with lower returns.')


plt.plot(rollVol, rollmean, 'r.'); plt.ylabel('Rolling Annual Volatility'); plt.xlabel('Rolling Annual Return'); plt.title('Or are they uncorrelated?')

Text(0.5, 1.0, 'Or are they uncorrelated?')


import statsmodels.api as sm
Y = rollVol[~np.isnan(rollVol)]
XX = rollmean[~np.isnan(rollmean)]
X = np.vstack((np.ones_like(Y), XX)).T


ols_model = sm.OLS(Y,X)
ols_results = ols_model.fit()
print(ols_results.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:              logReturn   R-squared:                       0.209
Model:                            OLS   Adj. R-squared:                  0.209
Method:                 Least Squares   F-statistic:                     620.2
Date:                Sun, 25 Oct 2020   Prob (F-statistic):          1.14e-121
Time:                        17:11:14   Log-Likelihood:                 9983.2
No. Observations:                2347   AIC:                        -1.996e+04
Df Residuals:                    2345   BIC:                        -1.995e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0118      0.000    103.863      0.000       0.012       0.012
x1            -5.5433      0.223    -24.905      0.000      -5.980      -5.107
==============================================================================
Omnibus:                      987.154   Durbin-Watson:                   0.008
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             3824.437
Skew:                           2.098   Prob(JB):                         0.00
Kurtosis:                       7.636   Cond. No.                     3.13e+03
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 3.13e+03. This might indicate that there are
strong multicollinearity or other numerical problems.


y = rets[1:]
x = np.vstack((np.ones_like(y), rets[:-1])).T 
ols_model = sm.OLS(y,x)
ols_results = ols_model.fit()
print(ols_results.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.026
Model:                            OLS   Adj. R-squared:                  0.026
Method:                 Least Squares   F-statistic:                     70.09
Date:                Sun, 25 Oct 2020   Prob (F-statistic):           9.15e-17
Time:                        17:11:14   Log-Likelihood:                 8066.4
No. Observations:                2597   AIC:                        -1.613e+04
Df Residuals:                    2595   BIC:                        -1.612e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0005      0.000      2.554      0.011       0.000       0.001
x1            -0.1622      0.019     -8.372      0.000      -0.200      -0.124
==============================================================================
Omnibus:                      937.223   Durbin-Watson:                   1.971
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            25824.370
Skew:                          -1.110   Prob(JB):                         0.00
Kurtosis:                      18.288   Cond. No.                         91.1
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.


y = rets[2:]
x = np.vstack((np.ones_like(y), rets[1:-1], rets[0:-2])).T 
ols_model = sm.OLS(y,x)
ols_results = ols_model.fit()
print(ols_results.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.034
Model:                            OLS   Adj. R-squared:                  0.033
Method:                 Least Squares   F-statistic:                     45.87
Date:                Sun, 25 Oct 2020   Prob (F-statistic):           2.65e-20
Time:                        17:11:14   Log-Likelihood:                 8073.4
No. Observations:                2596   AIC:                        -1.614e+04
Df Residuals:                    2593   BIC:                        -1.612e+04
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0005      0.000      2.342      0.019     8.1e-05       0.001
x1            -0.1477      0.020     -7.550      0.000      -0.186      -0.109
x2             0.0898      0.020      4.590      0.000       0.051       0.128
==============================================================================
Omnibus:                      844.028   Durbin-Watson:                   1.999
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            24056.416
Skew:                          -0.929   Prob(JB):                         0.00
Kurtosis:                      17.797   Cond. No.                         99.5
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.


y = rets[3:]
x = np.vstack((np.ones_like(y), rets[2:-1], rets[1:-2], rets[0:-3])).T 
ols_model = sm.OLS(y,x)
ols_results = ols_model.fit()
print(ols_results.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.034
Model:                            OLS   Adj. R-squared:                  0.033
Method:                 Least Squares   F-statistic:                     30.58
Date:                Sun, 25 Oct 2020   Prob (F-statistic):           2.01e-19
Time:                        17:11:14   Log-Likelihood:                 8069.9
No. Observations:                2595   AIC:                        -1.613e+04
Df Residuals:                    2591   BIC:                        -1.611e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0005      0.000      2.345      0.019    8.17e-05       0.001
x1            -0.1470      0.020     -7.484      0.000      -0.186      -0.108
x2             0.0889      0.020      4.495      0.000       0.050       0.128
x3            -0.0064      0.020     -0.324      0.746      -0.045       0.032
==============================================================================
Omnibus:                      850.982   Durbin-Watson:                   1.998
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            24429.576
Skew:                          -0.941   Prob(JB):                         0.00
Kurtosis:                      17.913   Cond. No.                         101.
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.


#to be used in the AR(2) model as initial values:
print(rets[0], rets[1])

-0.0032457133092441124 -0.004673271754301508


Yahoo['Close'][-1]

3465.389892578125

Stochastic Processes for Equity Index Prices¶

Geometric Brownian Motion process¶

Expected compounded average return¶

Parameter estimation¶

The GBM process is proven inadequate to model stock prices. Will a Stochastic Volatility model do a better job?¶

Is there autocorrelation in the stock returns?¶

	0
count	2598.000000
mean	0.000467
std	0.010981
min	-0.127652
25%	-0.003382
50%	0.000653
75%	0.005264
max	0.089683

	0
count	100.000000
mean	0.151401
std	0.196584
min	-0.255358
25%	0.029261
50%	0.127506
75%	0.265105
max	0.927490