1.模拟一AR(2)模型如下,n=200(模拟250,舍去50) \(x_t=-x_{t-1}-0.5x_{t-2}+\epsilon_t,\epsilon_t i.i.d~N(0,1)\) 利用此模拟序列,建立模型(画出时序图,acf和pacf)。 利用Yule-Walker方程估计\(\phi_1,\phi_2,{\sigma_\epsilon}^2\)。 利用levinson算法写程序估计以上参数。
#模拟序列,建立模型(画出时序图,acf和pacf)
n<-200;n0<-50 #舍去前50数据
a<--1
b<--0.5
set.seed(1234) #方便复现结果
eps <- rnorm(n+n0,0,1)
AR2 <- function(eps,n,n0) {
x=rep(0,n)
x1=rep(0,n+n0)
x1[1:2]=eps[1:2]
for (t in 3:length(eps)) {
x1[t]=a*x1[t-1]+b*x1[t-2]+eps[t]
}
x=x1[(n0+1):(n+n0)]
x<-ts(x)
return(x)
}
X=AR2(eps,n,n0)
#x <- arima.sim(model=list(ar=c(a,b)), n=200)
plot(X, type="l",col="red",xlab="time",ylab="",main=expression(X[t]))
acf(X)
pacf(X)
#利用Yule-Walker方程估计
fit.yw <- ar(X, aic=FALSE, order.max=2,method="yule-walker") #aic:If TRUE(默认值) then the AIC is used to choose the order of the autoregressive model. If FALSE, the model of order order.max is fitted.
cat("利用Yule-Walker方程估计的phi1和phi2是:",fit.yw$ar[1],"和",fit.yw$ar[2],"\n",sep="")
## 利用Yule-Walker方程估计的phi1和phi2是:-0.898588和-0.4048481
cat("利用Yule-Walker方程估计的sigma_epsilon是:",fit.yw$var.pred,sep="")
## 利用Yule-Walker方程估计的sigma_epsilon是:1.025736
#利用levinson算法估计
arma.Wold <- function(n, a, b=numeric(0)){
p <- length(a)
q <- length(b)
arev <- rev(a) #逆序
psi <- numeric(n)
psi[1] <- 1
for(j in seq(n-1)){
if(j <= q) bj=b[j]
else bj=0
psis <- psi[max(1, j+1-p):j]
np <- length(psis)
if(np < p) psis <- c(rep(0,p-np), psis)
psi[j+1] <- bj + sum(arev * psis)
}
psi
}
## Calculate theoretical autocovariance function of ARMA model using Wold expansion
arma.gamma.by.Wold <- function(n, a, b=numeric(0), sigma=1){
nn <- n + 100
psi <- arma.Wold(nn, a, b)
#print(psi)
gam <- numeric(n)
for(ii in seq(0, n-1)){
gam[ii+1] <- sum(psi[1:(nn-ii)] * psi[(ii+1):nn])
}
gam <- (sigma^2) * gam
gam
}
arma.gamma <- arma.gamma.by.Wold
### 对平稳列,已知自协方差列时用Levinson递推计算逐个一步预报系数(Y-W系数)和一步预测误差方差
### 输入: gams[1:n] 为\gamma_k, k=0,1,\dots,n-1
### 输出:预报Y_1, Y_2, \dots, Y_{n}所需的系数和均方误差。
### coef.YW -- (n-1)*(n-1)矩阵,第k行的1:k元素为
### a_{k,1}, a_{k,2}, \dots, a_{k,k},
### 用来预报Y_{k+1}:
### L(Y_{k+1} | Y_1, Y_2, \dots, Y_k)= a_{k,1} Y_{k} + a_{k,2} Y_{k-1} + \dots + a_{k,k} Y_1 k=1,2,\dots,n-1
### L(Y_1| ) = 0
### sigmasq --n向量,预报Y_1,Y_2,\dots,Y_n的一步预报均方误差
### 其1号元素是预报Y_1的均方误差,2号元素是预报Y_2的均方误差,..., n号元素是预报Y_n的均方误差。
#估计gamma(h),因为n较大,采用书7.2.1公式逼近
gamma<-function(h){
gammah<-0
n<-200
for(i in 1:(n-h)){
gammah<-gammah+(X[i]-mean(X))*(X[i+h]-mean(X))
}
return(gammah/n)
}
Levinson.coef <- function(gams){
n <- length(gams)
## ayw保存Y-W系数a[i,j], i=1,2,\dots,n-1, j=1,2,\dots,i
ayw <- matrix(0, n-1, n-1)
## ss保存一步预报误差方差
ss <- numeric(n)
ss[1] <- gams[0+1] ## 预报Y_1的均方误差, 等于\gamma_0
ayw[1,1] <- gams[1+1] / gams[0+1] ## 用Y_1预报Y_2的系数
ss[2] <- ss[1] * (1 - ayw[1,1]^2) ## 用Y_1预报Y_2的均方误差
if(n>2) for(k in 1:(n-2)){
## 用Y_1, \dots, Y_{k+1}预报Y_{k+2}的系数
ayw[k+1,k+1] <- (gams[(k+1)+1] - sum(ayw[k,1:k] * gams[(k:1)+1])) / ss[k+1]
ayw[k+1,1:k] <- ayw[k, 1:k] - ayw[k+1,k+1]*ayw[k, k:1]
## 用Y_1, \dots, Y_{k+1}预报Y_{k+2}的均方误差
ss[k+2] <- ss[k+1] * (1 - ayw[k+1,k+1]^2)
}
list(coef.YW=ayw, sigmasq=ss)
}
a1 <- c(-1,-0.5)
ng<-3;n<-3 #估计的自协方差gamma(0),gamma(1),gamma(2)与原模型相同
gams <- arma.gamma(n=3, a=c(-1,-0.5), b=0, sigma=1)
res <- Levinson.coef(gams)
cat("利用Levinson递推估计的phi1和phi2是:",res$coef.YW[2,1],"和",res$coef.YW[2,2],"\n",sep="")
## 利用Levinson递推估计的phi1和phi2是:-1和-0.5
cat("利用Levinson递推估计的sigma_epsilon是:",res$sigmasq[3],sep="")
## 利用Levinson递推估计的sigma_epsilon是:1
#用样本来写
gamma<-function(h){
gammah<-0
n<-200
for(i in 1:(n-h)){
gammah<-gammah+(X[i]-mean(X))*(X[i+h]-mean(X))
}
return(gammah/n)
}
Gam<-c(gamma(0),gamma(1),gamma(2))
res1 <- Levinson.coef(Gam)
cat("利用Levinson递推估计的phi1和phi2是:",res1$coef.YW[2,1],"和",res1$coef.YW[2,2],"\n",sep="")
## 利用Levinson递推估计的phi1和phi2是:-0.898588和-0.4048481
cat("利用Levinson递推估计的sigma_epsilon是:",res1$sigmasq[3],sep="")
## 利用Levinson递推估计的sigma_epsilon是:1.01035
2.实现P70例题3.1(何书元)
set.seed(1234) #设立某种子数从而可以复现结果
library(polynom)
ar.gen <- function(n, a, sigma=1.0, by.roots=FALSE,
plot.it=FALSE, n0=1000,
x0=numeric(length(a))){ #生成随机数列函数
if(by.roots){
require(polynom)
cf <- Re(coef(poly.calc(a))) #poly.calc是利用输入的根计算首一多项式 比如根为1.09e^(ipi/3),得到1.1881 - 1.09*x + x^2,然后取系数再取实部赋给cf
cf <- cf / cf[1]
a <- -cf[-1] #得到了由根推出的phi(z)的系数
}
n2 <- n0 + n
p <- length(a)
eps <- rnorm(n2, 0, sigma)
x2 <- filter(eps, a, method="recursive", side=1, init=x0)
x <- x2[(n0+1):n2]
x <- ts(x)
attr(x, "model") <- "AR"
attr(x, "a") <- a #a分量是序列的系数
attr(x, "sigma") <- sigma
if(plot.it) plot(x)
x
}
#理论谱密度图形
## AR theoretical spectral density plot given AR coefficients
ar.true.spectrum <- function(a, ngrid=256, sigma=1, plot.it=TRUE,
title="AR True Spectral Density"){
p <- length(a)
freqs <- seq(from=0, to=pi, length=ngrid)
spec <- numeric(ngrid)
for(ii in seq(ngrid)){
spec[ii] <- sigma^2 / (2*pi) /
abs(1 - sum(complex(mod=a, arg=freqs[ii]*seq(p))))^2
}
if(plot.it){
plot(freqs, spec, type='l',
main=title,
xlab="frequency", ylab="spectral density",
axes=FALSE)
axis(2)
axis(1, at=(0:6)/6*pi,
labels=c(0, expression(pi/6),
expression(pi/3), expression(pi/2),
expression(2*pi/3), expression(5*pi/6), expression(pi)))
}
invisible(list(frequencies=freqs, spectrum=spec,
ar.coefficients=a, sigma=sigma))
}
set.seed(1234)
demo.ar.roots <- function(){ #利用根生成对应系数的AR序列,画出时序图,acf,pacf和谱密度图
n <- 1024
rtlis <- list(c(complex(mod=1.09, arg=pi/3*c(1,-1)),
complex(mod=1.098, arg=pi*2/3*c(1,-1))),
c(complex(mod=1.264, arg=pi/3*c(1,-1)),
complex(mod=1.273, arg=pi*2/3*c(1,-1))),
c(complex(mod=1.635, arg=pi/3*c(1,-1)),
complex(mod=1.647, arg=pi*2/3*c(1,-1))))
tits <- c("AR(4): 1.09exp(+-i pi/3), 1.098exp(+-i 2pi/3)",
"AR(4): 1.264exp(+-i pi/3), 1.273exp(+-i 2pi/3)",
"AR(4): 1.635exp(+-i pi/3), 1.647exp(+-i 2pi/3)")
tits <- c(
expression(paste("AR(4):",
list(1.09*e^{phantom(.) %+-% i*frac(pi,3)},
1.098*e^{phantom(.) %+-% i*frac(2*pi,3)}) )),
expression(paste("AR(4):",
list(1.264*e^{phantom(.) %+-% i*frac(pi,3)},
1.273*e^{phantom(.) %+-% i*frac(2*pi,3)}) )),
expression(paste("AR(4):",
list(1.635*e^{phantom(.) %+-% i*frac(pi,3)},
1.647*e^{phantom(.) %+-% i*frac(2*pi,3)}) )))
oldpar <- par(mfrow=c(3,1), mar=c(2,2,0,0),
mgp=c(2, 0.5, 0),oma=c(0,0,2,0))
on.exit(par(oldpar))
for(ii in seq(along=rtlis)){
rt = rtlis[[ii]]
y <- ar.gen(n, rt, sigma=1.0, by.roots=TRUE,plot.it=FALSE)
plot(window(y, 1, 60)) #时序图
acf(y) #acf图
#spectrum(y, taper=0.2)
ar.true.spectrum(attr(y, "a"), title="") #谱密度图
mtext(tits[ii], side=3, outer=TRUE)
}
}
demo.ar.roots()
#改编了一下上面的函数,使得谱密度图可以画在一张上,并画出Gammak
demo.ar.roots_self <- function(){
n <- 1024
ng <- 100
rtlis <- list(c(complex(mod=1.09, arg=pi/3*c(1,-1)),
complex(mod=1.098, arg=pi*2/3*c(1,-1))),
c(complex(mod=1.264, arg=pi/3*c(1,-1)),
complex(mod=1.273, arg=pi*2/3*c(1,-1))),
c(complex(mod=1.635, arg=pi/3*c(1,-1)),
complex(mod=1.647, arg=pi*2/3*c(1,-1))))
tits <- c("AR(4): 1.09exp(+-i pi/3), 1.098exp(+-i 2pi/3)",
"AR(4): 1.264exp(+-i pi/3), 1.273exp(+-i 2pi/3)",
"AR(4): 1.635exp(+-i pi/3), 1.647exp(+-i 2pi/3)")
tits <- c(
expression(paste("AR(4):",
list(1.09*e^{phantom(.) %+-% i*frac(pi,3)},
1.098*e^{phantom(.) %+-% i*frac(2*pi,3)}) )),
expression(paste("AR(4):",
list(1.264*e^{phantom(.) %+-% i*frac(pi,3)},
1.273*e^{phantom(.) %+-% i*frac(2*pi,3)}) )),
expression(paste("AR(4):",
list(1.635*e^{phantom(.) %+-% i*frac(pi,3)},
1.647*e^{phantom(.) %+-% i*frac(2*pi,3)}) )))
y1<-ar.gen(n, rtlis[[1]], sigma=1.0, by.roots=TRUE,plot.it=FALSE)
y2<-ar.gen(n, rtlis[[2]], sigma=1.0, by.roots=TRUE,plot.it=FALSE)
y3<-ar.gen(n, rtlis[[3]], sigma=1.0, by.roots=TRUE,plot.it=FALSE)
#只是借助了ar.gen函数求出序列系数,和生成随机序列无关
p1 <- length(attr(y1, "a"))
p2 <- length(attr(y2, "a"))
p3 <- length(attr(y3, "a"))
sigma=1
ngrid=256
freqs <- seq(from=0, to=pi, length=ngrid)
spec1 <- numeric(ngrid)
for(ii in seq(ngrid)){
spec1[ii] <- sigma^2 / (2*pi) /
abs(1 - sum(complex(mod=attr(y1, "a"), arg=freqs[ii]*seq(p1))))^2
}
spec2 <- numeric(ngrid)
for(ii in seq(ngrid)){
spec2[ii] <- sigma^2 / (2*pi) /
abs(1 - sum(complex(mod=attr(y2, "a"), arg=freqs[ii]*seq(p2))))^2
}
spec3 <- numeric(ngrid)
for(ii in seq(ngrid)){
spec3[ii] <- sigma^2 / (2*pi) /
abs(1 - sum(complex(mod=attr(y3, "a"), arg=freqs[ii]*seq(p3))))^2
}
plot(freqs, spec1, type='l',main="AR True Spectral Density",xlab="frequency", ylab="spectral density",axes=FALSE)
axis(2)
axis(1, at=(0:6)/6*pi,labels=c(0, expression(pi/6),expression(pi/3),
expression(pi/2),expression(2*pi/3),
expression(5*pi/6), expression(pi)))
lines(freqs,spec2,col="red",type='l')
lines(freqs,spec3,col="blue",type='l')
invisible(list(frequencies=freqs, spectrum=spec1,
ar.coefficients=attr(y1, "a"), sigma=sigma))
#ar.true.spectrum(attr(y, "a"), title="") #谱密度图
#画Gammak图
#计算得到f(x)=1.43238 - 2.61864 x + 3.59052 x^2 - 2.188 x^3 + x^4
a1<-c(2.61864/1.43238,-3.59052/1.43238,2.188/1.43238,-1/1.43238)
#polyroot(c(1, -c(2.61864/1.43238,-3.59052/1.43238,2.188/1.43238,-1/1.43238))) 验证根反推回的多项式系数是否正确
#abs(polyroot(c(1,-c(2.61864/1.43238,-3.59052/1.43238,2.188/1.43238,-1/1.43238))))
#计算得到f(x)=2.58911 - 4.08222 x + 4.8273 x^2 - 2.537 x^3 + x^4
a2<-c(4.08222/2.58911,-4.8273/2.58911,2.537/2.58911,-1/2.58911)
#polyroot(c(1, -c(4.08222/2.58911,-4.8273/2.58911,2.537/2.58911,-1/2.58911))) 验证根反推回的多项式系数是否正确
#abs(polyroot(c(1,-c(4.08222/2.58911,-4.8273/2.58911,2.537/2.58911,-1/2.58911))))
#计算得到f(x)=7.25141 - 8.83792 x + 8.07868 x^2 - 3.282 x^3 + x^4
a3<-c(8.83792/7.25141,-8.07868/7.25141,3.282/7.25141,-1/7.25141)
#polyroot(c(1, -c(8.83792/7.25141,-8.07868/7.25141,3.282/7.25141,-1/7.25141))) 验证根反推回的多项式系数是否正确
#abs(polyroot(c(1,-c(8.83792/7.25141,-8.07868/7.25141,3.282/7.25141,-1/7.25141))))
gam1<-numeric(ng)
gam1<-arma.gamma(ng, a1, b, sigma=1)
plot(seq(1:25),gam1[1:25],type="l",col="red",xlab = "第1个模型",ylab="")
abline(h=0,col="grey")
gam2<-numeric(ng)
gam2<-arma.gamma(ng, a2, b, sigma=1)
plot(seq(1:25),gam2[1:25],type="l",col="blue",xlab = "第2个模型",ylab="")
abline(h=0,col="grey")
gam3<-numeric(ng)
gam3<-arma.gamma(ng, a3, b, sigma=1)
plot(seq(1:25),gam3[1:25],type="l",col="green",xlab = "第3个模型",ylab="")
abline(h=0,col="grey")
}
demo.ar.roots_self()
第四幅图曲线按照峰值从高到低依次对应三个模型。
对第一个模型来说,复根都很靠近单位圆,所以谱密度会在\(\pi/3\)和\(2\pi/3\)附近有峰值,最上面的曲线在\(\lambda_1=1.052\)和\(\lambda_2=2.065\)有两个明显峰值,表明相应AR(4)序列自协方差函数有周期\(T_1=2\pi/{\lambda_1}=5.97\)和\(T_2=2\pi/{\lambda_2}=3.04\)的特性,\(\lambda_1\)和\(\lambda_2\)分别靠近1.047和2.094.
对第二个模型来说,中间的曲线在\(\lambda_1=1.07\)和\(\lambda_2=2.06\)有两个明显峰值,表明相应AR(4)序列自协方差函数有周期\(T_1=2\pi/{\lambda_1}=5.87\)和\(T_2=2\pi/{\lambda_2}=3.05\)的特性,\(\lambda_1\)和\(\lambda_2\)分别靠近1.047和2.094.
对第三个模型来说,对应最下面的曲线在\(\lambda_1=1.217\)和\(\lambda_2=1.926\)有两个不明显峰值,表明相应AR(4)序列自协方差函数有周期\(T_1=2\pi/{\lambda_1}=5.16\)和\(T_2=2\pi/{\lambda_2}=3.26\)的特性.
最后三张图是\(\gamma_k\)曲线,第一张对应第一个模型,可以看出\(|\gamma_k|\)递减的比较慢,体现出有周期\(T_1,T_2\)的特性,在\(k=0,6,...,24\)处有大峰值,在\(k=0,3,6,...,24\)有峰值。第二张对应第二个模型,可以看出\(|\gamma_k|\)递减的比第一个模型的快,但还是体现出有周期\(T_1,T_2\)的特性。第三张对应第三个模型,可以看出\(|\gamma_k|\)递减的很快,周期特性不再明显。
模拟一AR(2)模型,\(X_t=0.8X_{t-1}-0.1X_{t-2}+\epsilon_t,\epsilon_t
i.i.d~N(0,1)\) ,
1),计算其谱密度为\(f(x)=\frac{1}{2\pi|(1-0.8e^{iw}+0.1e^{2iw})|^2}\),画出其理论谱密度图。
2).
根据此模型模拟500个观测值,先做白噪声检验利用acf和pacf定出模型AR(2),根据此序列利用Yule-Walker方程估计AR(2)模型的参数值,并根据估计值算出谱密度估计,画出图形,与真实的相比较(画在同一幅图里)。
#法一
library(astsa)
arma.spec(ar=c(0.8,-0.1),main="Autoregression")
#法2
ar.true.spectrum(c(0.8,-0.1))
#法三 直接计算(实际上与上面的函数相同)
ft <- function(lambda,sigma=1,phi){ #输入AR(2)序列系数
phi<-abs(phi)
sigma^2 / (2*pi) / Mod(1 - complex(mod=phi[1], arg=-lambda)+ complex(mod=phi[2], arg=-2*lambda))^2
}
lams <- seq(0, pi, length=201)
spec0 <- ft(lams,phi=c(0.8,-0.1))
plot(lams, spec0, lwd=3, type="l",main="AR(2)序列理论谱密度图",xlab="frequency", ylab="")
#模拟观测值
n<-500;n0<-50 #舍去前50数据
a<-0.8
b<--0.1
#eps <- rnorm(n+n0,0,1)
#X=AR2(eps,n,n0)
X <- arima.sim(model=list(ar=c(a,b)), n=500)
print(Box.test(X,type = "Ljung-Box", lag = 6))
##
## Box-Ljung test
##
## data: X
## X-squared = 407.76, df = 6, p-value < 2.2e-16
print(Box.test(X,type = "Ljung-Box", lag = 12))
##
## Box-Ljung test
##
## data: X
## X-squared = 431.95, df = 12, p-value < 2.2e-16
白噪声检验显示序列值彼此之间蕴涵着相关关系,为非白噪声序列。
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(fUnitRoots)
## 载入需要的程辑包:timeDate
## 载入需要的程辑包:timeSeries
## 载入需要的程辑包:fBasics
##
## 载入程辑包:'fBasics'
## The following object is masked from 'package:astsa':
##
## nyse
library(forecast)
##
## 载入程辑包:'forecast'
## The following object is masked from 'package:astsa':
##
## gas
library(zoo)
##
## 载入程辑包:'zoo'
## The following object is masked from 'package:timeSeries':
##
## time<-
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
plot(X, type="l",col="red",xlab="time",ylab="",main=expression(X[t]))
acf(X)
pacf(X)
adf.test(X)
##
## Augmented Dickey-Fuller Test
##
## data: X
## Dickey-Fuller = -7.8404, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
auto.arima(X)
## Series: X
## ARIMA(2,0,0) with zero mean
##
## Coefficients:
## ar1 ar2
## 0.8336 -0.1478
## s.e. 0.0442 0.0443
##
## sigma^2 = 1.038: log likelihood = -718.23
## AIC=1442.47 AICc=1442.52 BIC=1455.11
从 ACF
图中可以看到Lag=4之后,ACF大致控制在2倍标准差之内,可以判断该序列具有短期相关性,即认为该序列符合平稳序列的特征。且从PACF图可以看出AR序列“2阶截尾”,说明序列符合平稳性。此外利用单位根检验发现序列也是平稳的。
从 ACF 图和 PACF
图可以看出,自相关系数“拖尾”,偏自相关系数“2阶截尾”,因此初步确定拟合的模型为AR(2)模型。采用auto.arima函数得到了同样的结论。
#利用Yule-Walker方程估计
fit.yw <- ar(X, aic=FALSE, order.max=2,method="yule-walker")
cat("利用Yule-Walker方程估计的phi1和phi2是:",fit.yw$ar[1],"和",fit.yw$ar[2],"\n",sep="")
## 利用Yule-Walker方程估计的phi1和phi2是:0.8313212和-0.1479688
cat("利用Yule-Walker方程估计的sigma_epsilon是:",fit.yw$var.pred,sep="")
## 利用Yule-Walker方程估计的sigma_epsilon是:1.045438
根据Y-W方程得到的估计参数,算出谱密度估计为\(f(x)=\frac{1.045438}{2\pi|(1-0.8313212e^{iw}+0.1479688e^{2iw})|^2}\)(因为arima的随机性,实际上生成html的时候每次得到的估计值不一定一样,不过道理相同,用上面得到的估计方差和参数带入上式即可)
yw.spec<-function(phi1,X,fit.yw){ #输入真实的系数,输入序列(作YW估计)
lams <- seq(0, pi, length=201)
spec0 <- ft(lams,sigma=1,phi=phi1)
plot(lams, spec0,lwd=3,type="l",lty=1,main="AR(2)序列理论谱密度图",xlab="frequency", ylab="")
spec1 <- ar.true.spectrum(fit.yw$ar,ngrid=length(lams),sigma=sqrt(fit.yw$var.pred),plot.it=FALSE)$spectrum #实际上相当于用ft函数算了一遍谱密度值
lines(lams, spec1, col="green", lwd=2, lty=2)
legend("topright", lwd=c(3,2), lty=c(1,2),col=c("black", "green"),
legend=c("True","Y-W得到的谱密度"))
}
yw.spec(phi=c(0.8,-0.1),X,fit.yw)
时间数据建模。考虑美国实际GDP(1941-2012年)数据,q-gdpmc1.txt。
1)画出时序图,考虑做对数差分GDP(GDP增长率)Yt
2) 画出Yt的时序图,做白噪声检验,并做均值是否为零的检验(t检验)。
3)画出Yt的acf和pacf图,确定AR模型,定阶并做估计
data<-read.table("D:/atas/atas_exp3/q-gdpmc1.txt",header=TRUE)
x = ts(data$gdp, start=c(1947,1),frequency = 4)
plot(x,main="美国实际GDP")
#做对数差分,画时序图
Y=diff(log(x))
plot(Y,main="GDP增长率")
#做白噪声检验
print(Box.test(Y,type = "Ljung-Box", lag = 6))
##
## Box-Ljung test
##
## data: Y
## X-squared = 55.586, df = 6, p-value = 3.53e-10
print(Box.test(Y,type = "Ljung-Box", lag = 12))
##
## Box-Ljung test
##
## data: Y
## X-squared = 64.259, df = 12, p-value = 3.737e-09
#做均值是否为零的检验(t检验)
t.test(Y,mu=0)
##
## One Sample t-test
##
## data: Y
## t = 12.786, df = 262, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.006573382 0.008966545
## sample estimates:
## mean of x
## 0.007769964
白噪声检验显示序列值彼此之间蕴涵着相关关系,为非白噪声序列。
t检验说明均值为0的假设不成立。
library(zoo)
library(TSA)
## Registered S3 methods overwritten by 'TSA':
## method from
## fitted.Arima forecast
## plot.Arima forecast
##
## 载入程辑包:'TSA'
## The following objects are masked from 'package:timeDate':
##
## kurtosis, skewness
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
library(forecast)
acf(Y)
pacf(Y)
#自动拟合
auto.arima(Y)
## Series: Y
## ARIMA(3,0,1)(2,0,1)[4] with non-zero mean
##
## Coefficients:
## ar1 ar2 ar3 ma1 sar1 sar2 sma1 mean
## -0.2310 0.3256 -0.0171 0.5677 -0.6868 -0.0405 0.5850 0.0077
## s.e. 0.3059 0.1109 0.0829 0.2987 0.4448 0.0852 0.4386 0.0009
##
## sigma^2 = 8.391e-05: log likelihood = 864.99
## AIC=-1711.99 AICc=-1711.27 BIC=-1679.84
#BIC定阶
res=armasubsets(y=Y,nar=15,nma=15,y.name='test',ar.method='ols')
## Reordering variables and trying again:
plot(res)
#AIC定阶
ar(Y,method = "mle")
##
## Call:
## ar(x = Y, method = "mle")
##
## Coefficients:
## 1 2 3
## 0.3475 0.1283 -0.1121
##
## Order selected 3 sigma^2 estimated as 8.17e-05
从 ACF
图中可以看到Lag=3之后,ACF大致控制在2倍标准差之内,可以判断该序列具有短期相关性,即认为该序列符合平稳序列的特征。且从PACF图可以看出AR序列“3阶截尾”,说明序列符合平稳性。此外利用单位根检验发现序列也是平稳的。
从 ACF 图和 PACF
图可以看出,自相关系数“拖尾”,偏自相关系数“3阶截尾”,因此初步确定拟合的模型为AR(3)模型。
采用auto.arima函数得到拟合为ARIMA(3,0,1)(2,0,1)[4]模型较合适,考虑AR模型的话可以选取AR(3).
采用TSA包的armasubsets函数发现选择AR(1)模型BIC值最小。
采用ar函数利用mle方法发现AR(3)模型AIC值最小。
综上结合题意,选择AR(3)模型,即定阶数为3。
#参数的极大似然估计
Y.fit = arima(Y, order=c(3,0,0), method = "ML")
Y.fit
##
## Call:
## arima(x = Y, order = c(3, 0, 0), method = "ML")
##
## Coefficients:
## ar1 ar2 ar3 intercept
## 0.3476 0.1281 -0.1121 0.0077
## s.e. 0.0612 0.0644 0.0613 0.0009
##
## sigma^2 estimated as 8.17e-05: log likelihood = 864.45, aic = -1720.9
根据参数估计结果,写出模型的表达式:\(X_t-0.0077=\frac{\epsilon_t}{1-0.3476B-0.1281B^2+0.1121B^3},Var(\epsilon_t)=8.17*10^{-5}\)
# 对残差序列 Y.fit$residuals 进行白噪声检验
for (i in 1:2) {
print(Box.test(Y.fit$residuals, lag = 6*i))
}
##
## Box-Pierce test
##
## data: Y.fit$residuals
## X-squared = 3.5054, df = 6, p-value = 0.7433
##
##
## Box-Pierce test
##
## data: Y.fit$residuals
## X-squared = 12.369, df = 12, p-value = 0.4165
Y.fit
##
## Call:
## arima(x = Y, order = c(3, 0, 0), method = "ML")
##
## Coefficients:
## ar1 ar2 ar3 intercept
## 0.3476 0.1281 -0.1121 0.0077
## s.e. 0.0612 0.0644 0.0613 0.0009
##
## sigma^2 estimated as 8.17e-05: log likelihood = 864.45, aic = -1720.9
# ar1 系数的显著性检验
t1 = 0.3476/0.0612
# 利用 P 值函数 pt()计算P值,其中自由度df为时间序列样本数减去未知参数个数,本案例中为 263-4=259
p1 = 2*pt(t1, df=259, lower.tail = F)
# ar2 系数的显著性检验
t2 = 0.1281/0.0644
p2 = 2*pt(t2, df=259, lower.tail = F)
# ar3 系数的显著性检验
t3 = -0.1121/0.0613
p3 = 2*pt(t3, df=259, lower.tail = T)
# 常数的显著性检验
t0 = 0.0077/0.0009
p0 = 2*pt(t0, df=259, lower.tail = F)
p_value = data.frame(p1, p2,p3, p0)
row.names(p_value) = "P 值"
p_value
## p1 p2 p3 p0
## P 值 3.616389e-08 0.04773877 0.06859282 1.044972e-15
由于各阶延迟下的LB统计量的P值都显著大于\(\alpha=0.05\),接受残差序列为白噪声序列的原假设,即该拟合模型在置信水平 95% 下显著有效。
检验结果显示,三个系数的P值基本都小于\(\alpha=0.05\)(除p3略大于0.05),拒绝系数等于0的原假设,因此在置信水平 95% 下均显著非零。
library(forecast)
Y.fit = stats::arima(Y, order=c(3,0,0), method = "ML")
#先调用forecast后调用TSA包会出现The following objects are masked from ‘package:stats’: acf, arima。而forecast包适配的是stat里的arima,不加的话Y.fit$series="x"
#先调用TSA后调用forecast包也可以解决该问题
Y.fore = forecast(Y.fit, h=20)
#Y.fore
plot(Y)
plot(Y.fore,data=Y)
#指数平滑法预测
Y.Holt <- HoltWinters(Y)
Yforecast.Holt<-forecast(Y.Holt,h=20)
plot(Yforecast.Holt)