第一题

1.模拟一AR(2)模型如下,n=200(模拟250,舍去50) \(x_t=-x_{t-1}-0.5x_{t-2}+\epsilon_t,\epsilon_t i.i.d~N(0,1)\) 利用此模拟序列,建立模型(画出时序图,acf和pacf)。 利用Yule-Walker方程估计\(\phi_1,\phi_2,{\sigma_\epsilon}^2\)。 利用levinson算法写程序估计以上参数。

模拟序列,建立模型(画出时序图,acf和pacf)

#模拟序列,建立模型(画出时序图,acf和pacf)
n<-200;n0<-50  #舍去前50数据
a<--1
b<--0.5
set.seed(1234)    #方便复现结果
eps <- rnorm(n+n0,0,1)
AR2 <- function(eps,n,n0) {
  x=rep(0,n)
  x1=rep(0,n+n0)
  x1[1:2]=eps[1:2]
  for (t in 3:length(eps)) {
    x1[t]=a*x1[t-1]+b*x1[t-2]+eps[t]
  }
  x=x1[(n0+1):(n+n0)]
  x<-ts(x)
  return(x)
}
X=AR2(eps,n,n0)
#x <- arima.sim(model=list(ar=c(a,b)), n=200)     
plot(X, type="l",col="red",xlab="time",ylab="",main=expression(X[t]))

acf(X)

pacf(X)

利用Yule-Walker方程估计

#利用Yule-Walker方程估计
fit.yw <- ar(X, aic=FALSE, order.max=2,method="yule-walker") #aic:If TRUE(默认值) then the AIC is used to choose the order of the autoregressive model. If FALSE, the model of order order.max is fitted.
cat("利用Yule-Walker方程估计的phi1和phi2是:",fit.yw$ar[1],"和",fit.yw$ar[2],"\n",sep="")
## 利用Yule-Walker方程估计的phi1和phi2是:-0.898588和-0.4048481
cat("利用Yule-Walker方程估计的sigma_epsilon是:",fit.yw$var.pred,sep="")
## 利用Yule-Walker方程估计的sigma_epsilon是:1.025736

利用levinson算法估计

#利用levinson算法估计
arma.Wold <- function(n, a, b=numeric(0)){
  p <- length(a)
  q <- length(b)
  arev <- rev(a)   #逆序
  psi <- numeric(n)
  psi[1] <- 1
  for(j in seq(n-1)){
    if(j <= q) bj=b[j]
    else bj=0
    psis <- psi[max(1, j+1-p):j]
    np <- length(psis)
    if(np < p) psis <- c(rep(0,p-np), psis)
    psi[j+1] <- bj + sum(arev * psis)
  }
  
  psi
}
## Calculate theoretical autocovariance function of ARMA model using Wold expansion
arma.gamma.by.Wold <- function(n, a, b=numeric(0), sigma=1){
  nn <- n + 100
  psi <- arma.Wold(nn, a, b)
  #print(psi)
  gam <- numeric(n)
  for(ii in seq(0, n-1)){
    gam[ii+1] <- sum(psi[1:(nn-ii)] * psi[(ii+1):nn])
  }
  gam <- (sigma^2) * gam
  gam
}
arma.gamma <- arma.gamma.by.Wold

### 对平稳列,已知自协方差列时用Levinson递推计算逐个一步预报系数(Y-W系数)和一步预测误差方差
### 输入: gams[1:n] 为\gamma_k, k=0,1,\dots,n-1
### 输出:预报Y_1, Y_2, \dots, Y_{n}所需的系数和均方误差。
###   coef.YW -- (n-1)*(n-1)矩阵,第k行的1:k元素为
###         a_{k,1}, a_{k,2}, \dots, a_{k,k},
###         用来预报Y_{k+1}:
###         L(Y_{k+1} | Y_1, Y_2, \dots, Y_k)= a_{k,1} Y_{k} + a_{k,2} Y_{k-1} + \dots + a_{k,k} Y_1       k=1,2,\dots,n-1
###       L(Y_1| ) = 0
###   sigmasq --n向量,预报Y_1,Y_2,\dots,Y_n的一步预报均方误差
###       其1号元素是预报Y_1的均方误差,2号元素是预报Y_2的均方误差,..., n号元素是预报Y_n的均方误差。

#估计gamma(h),因为n较大,采用书7.2.1公式逼近
gamma<-function(h){
  gammah<-0
  n<-200
  for(i in 1:(n-h)){
      gammah<-gammah+(X[i]-mean(X))*(X[i+h]-mean(X))
  }
  return(gammah/n)
}

Levinson.coef <- function(gams){
  n <- length(gams)

  ## ayw保存Y-W系数a[i,j], i=1,2,\dots,n-1, j=1,2,\dots,i
  ayw <- matrix(0, n-1, n-1)
  ## ss保存一步预报误差方差
  ss <- numeric(n)

  ss[1] <- gams[0+1] ## 预报Y_1的均方误差, 等于\gamma_0
  ayw[1,1] <- gams[1+1] / gams[0+1] ## 用Y_1预报Y_2的系数
  ss[2] <- ss[1] * (1 - ayw[1,1]^2) ## 用Y_1预报Y_2的均方误差
  if(n>2) for(k in 1:(n-2)){
    ## 用Y_1, \dots, Y_{k+1}预报Y_{k+2}的系数
    ayw[k+1,k+1] <- (gams[(k+1)+1] - sum(ayw[k,1:k] * gams[(k:1)+1])) / ss[k+1]
    ayw[k+1,1:k] <- ayw[k, 1:k] - ayw[k+1,k+1]*ayw[k, k:1]
    ## 用Y_1, \dots, Y_{k+1}预报Y_{k+2}的均方误差
    ss[k+2] <- ss[k+1] * (1 - ayw[k+1,k+1]^2)
  }

  list(coef.YW=ayw, sigmasq=ss)
}

a1 <- c(-1,-0.5)
ng<-3;n<-3   #估计的自协方差gamma(0),gamma(1),gamma(2)与原模型相同
gams <- arma.gamma(n=3, a=c(-1,-0.5), b=0, sigma=1)
res <- Levinson.coef(gams)
cat("利用Levinson递推估计的phi1和phi2是:",res$coef.YW[2,1],"和",res$coef.YW[2,2],"\n",sep="")
## 利用Levinson递推估计的phi1和phi2是:-1和-0.5
cat("利用Levinson递推估计的sigma_epsilon是:",res$sigmasq[3],sep="")
## 利用Levinson递推估计的sigma_epsilon是:1
#用样本来写
gamma<-function(h){
  gammah<-0
  n<-200
  for(i in 1:(n-h)){
    gammah<-gammah+(X[i]-mean(X))*(X[i+h]-mean(X))
  }
  return(gammah/n)
}
Gam<-c(gamma(0),gamma(1),gamma(2))
res1 <- Levinson.coef(Gam)
cat("利用Levinson递推估计的phi1和phi2是:",res1$coef.YW[2,1],"和",res1$coef.YW[2,2],"\n",sep="")
## 利用Levinson递推估计的phi1和phi2是:-0.898588和-0.4048481
cat("利用Levinson递推估计的sigma_epsilon是:",res1$sigmasq[3],sep="")
## 利用Levinson递推估计的sigma_epsilon是:1.01035

第二题

2.实现P70例题3.1(何书元)

set.seed(1234)   #设立某种子数从而可以复现结果
library(polynom)
ar.gen <- function(n, a, sigma=1.0, by.roots=FALSE,
                   plot.it=FALSE, n0=1000,
                   x0=numeric(length(a))){   #生成随机数列函数
  if(by.roots){
    require(polynom)
    cf <- Re(coef(poly.calc(a)))   #poly.calc是利用输入的根计算首一多项式 比如根为1.09e^(ipi/3),得到1.1881 - 1.09*x + x^2,然后取系数再取实部赋给cf 
    cf <- cf / cf[1]
    a <- -cf[-1]   #得到了由根推出的phi(z)的系数
  }
  n2 <- n0 + n
  p <- length(a)
  eps <- rnorm(n2, 0, sigma)
  x2 <- filter(eps, a, method="recursive", side=1, init=x0)
  x <- x2[(n0+1):n2]
  x <- ts(x)
  attr(x, "model") <- "AR"
  attr(x, "a") <- a   #a分量是序列的系数
  attr(x, "sigma") <- sigma
  if(plot.it) plot(x)
  x
}

#理论谱密度图形
## AR theoretical spectral density plot given AR coefficients
ar.true.spectrum <- function(a, ngrid=256, sigma=1, plot.it=TRUE, 
                             title="AR True Spectral Density"){
  p <- length(a)
  freqs <- seq(from=0, to=pi, length=ngrid)
  spec <- numeric(ngrid)
  for(ii in seq(ngrid)){
    spec[ii] <- sigma^2 / (2*pi) / 
      abs(1 - sum(complex(mod=a, arg=freqs[ii]*seq(p))))^2
  }
  if(plot.it){
    plot(freqs, spec, type='l',
         main=title,
         xlab="frequency", ylab="spectral density",
         axes=FALSE)
    axis(2)
    axis(1, at=(0:6)/6*pi,
         labels=c(0, expression(pi/6),
           expression(pi/3), expression(pi/2),
           expression(2*pi/3), expression(5*pi/6), expression(pi)))
  }
  invisible(list(frequencies=freqs, spectrum=spec,
            ar.coefficients=a, sigma=sigma))
}
set.seed(1234)
demo.ar.roots <- function(){    #利用根生成对应系数的AR序列,画出时序图,acf,pacf和谱密度图
  n <- 1024
  rtlis <- list(c(complex(mod=1.09, arg=pi/3*c(1,-1)),
                  complex(mod=1.098, arg=pi*2/3*c(1,-1))),
                c(complex(mod=1.264, arg=pi/3*c(1,-1)),
                  complex(mod=1.273, arg=pi*2/3*c(1,-1))),
                c(complex(mod=1.635, arg=pi/3*c(1,-1)),
                  complex(mod=1.647, arg=pi*2/3*c(1,-1))))
  tits <- c("AR(4): 1.09exp(+-i pi/3), 1.098exp(+-i 2pi/3)",
            "AR(4): 1.264exp(+-i pi/3), 1.273exp(+-i 2pi/3)",
            "AR(4): 1.635exp(+-i pi/3), 1.647exp(+-i 2pi/3)")
  tits <- c(
    expression(paste("AR(4):",
              list(1.09*e^{phantom(.) %+-% i*frac(pi,3)},
                  1.098*e^{phantom(.) %+-% i*frac(2*pi,3)}) )),
    expression(paste("AR(4):",
              list(1.264*e^{phantom(.) %+-% i*frac(pi,3)},
                  1.273*e^{phantom(.) %+-% i*frac(2*pi,3)}) )),
    expression(paste("AR(4):",
              list(1.635*e^{phantom(.) %+-% i*frac(pi,3)},
                  1.647*e^{phantom(.) %+-% i*frac(2*pi,3)}) )))
  oldpar <- par(mfrow=c(3,1), mar=c(2,2,0,0),
                mgp=c(2, 0.5, 0),oma=c(0,0,2,0))
  on.exit(par(oldpar))
  for(ii in seq(along=rtlis)){
    rt = rtlis[[ii]]
    y <- ar.gen(n, rt, sigma=1.0, by.roots=TRUE,plot.it=FALSE)
    plot(window(y, 1, 60))   #时序图
    acf(y)                #acf图
    #spectrum(y, taper=0.2)
    ar.true.spectrum(attr(y, "a"), title="")   #谱密度图
    mtext(tits[ii], side=3, outer=TRUE)
  }
}
demo.ar.roots()

#改编了一下上面的函数,使得谱密度图可以画在一张上,并画出Gammak
demo.ar.roots_self <- function(){
  n <- 1024
  ng <- 100
  rtlis <- list(c(complex(mod=1.09, arg=pi/3*c(1,-1)),
                  complex(mod=1.098, arg=pi*2/3*c(1,-1))),
                c(complex(mod=1.264, arg=pi/3*c(1,-1)),
                  complex(mod=1.273, arg=pi*2/3*c(1,-1))),
                c(complex(mod=1.635, arg=pi/3*c(1,-1)),
                  complex(mod=1.647, arg=pi*2/3*c(1,-1))))
  tits <- c("AR(4): 1.09exp(+-i pi/3), 1.098exp(+-i 2pi/3)",
            "AR(4): 1.264exp(+-i pi/3), 1.273exp(+-i 2pi/3)",
            "AR(4): 1.635exp(+-i pi/3), 1.647exp(+-i 2pi/3)")
  tits <- c(
    expression(paste("AR(4):",
              list(1.09*e^{phantom(.) %+-% i*frac(pi,3)},
                  1.098*e^{phantom(.) %+-% i*frac(2*pi,3)}) )),
    expression(paste("AR(4):",
              list(1.264*e^{phantom(.) %+-% i*frac(pi,3)},
                  1.273*e^{phantom(.) %+-% i*frac(2*pi,3)}) )),
    expression(paste("AR(4):",
              list(1.635*e^{phantom(.) %+-% i*frac(pi,3)},
                  1.647*e^{phantom(.) %+-% i*frac(2*pi,3)}) )))
  
  y1<-ar.gen(n, rtlis[[1]], sigma=1.0, by.roots=TRUE,plot.it=FALSE)
  y2<-ar.gen(n, rtlis[[2]], sigma=1.0, by.roots=TRUE,plot.it=FALSE)
  y3<-ar.gen(n, rtlis[[3]], sigma=1.0, by.roots=TRUE,plot.it=FALSE)
  #只是借助了ar.gen函数求出序列系数,和生成随机序列无关
  p1 <- length(attr(y1, "a"))    
  p2 <- length(attr(y2, "a"))
  p3 <- length(attr(y3, "a"))
  sigma=1
  ngrid=256
  freqs <- seq(from=0, to=pi, length=ngrid)
  spec1 <- numeric(ngrid)
  for(ii in seq(ngrid)){
    spec1[ii] <- sigma^2 / (2*pi) / 
      abs(1 - sum(complex(mod=attr(y1, "a"), arg=freqs[ii]*seq(p1))))^2
  }
  spec2 <- numeric(ngrid)
  for(ii in seq(ngrid)){
    spec2[ii] <- sigma^2 / (2*pi) / 
      abs(1 - sum(complex(mod=attr(y2, "a"), arg=freqs[ii]*seq(p2))))^2
  }
  spec3 <- numeric(ngrid)
  for(ii in seq(ngrid)){
    spec3[ii] <- sigma^2 / (2*pi) / 
      abs(1 - sum(complex(mod=attr(y3, "a"), arg=freqs[ii]*seq(p3))))^2
  }
  plot(freqs, spec1, type='l',main="AR True Spectral Density",xlab="frequency", ylab="spectral density",axes=FALSE)
  axis(2)
  axis(1, at=(0:6)/6*pi,labels=c(0, expression(pi/6),expression(pi/3), 
                                 expression(pi/2),expression(2*pi/3), 
                                 expression(5*pi/6), expression(pi)))
  lines(freqs,spec2,col="red",type='l')
  lines(freqs,spec3,col="blue",type='l')
  invisible(list(frequencies=freqs, spectrum=spec1,
            ar.coefficients=attr(y1, "a"), sigma=sigma))
  #ar.true.spectrum(attr(y, "a"), title="")   #谱密度图
  
  #画Gammak图
  #计算得到f(x)=1.43238 - 2.61864 x + 3.59052 x^2 - 2.188 x^3 + x^4
  a1<-c(2.61864/1.43238,-3.59052/1.43238,2.188/1.43238,-1/1.43238)
  #polyroot(c(1, -c(2.61864/1.43238,-3.59052/1.43238,2.188/1.43238,-1/1.43238)))   验证根反推回的多项式系数是否正确
  #abs(polyroot(c(1,-c(2.61864/1.43238,-3.59052/1.43238,2.188/1.43238,-1/1.43238))))
  
  #计算得到f(x)=2.58911 - 4.08222 x + 4.8273 x^2 - 2.537 x^3 + x^4
  a2<-c(4.08222/2.58911,-4.8273/2.58911,2.537/2.58911,-1/2.58911)
  #polyroot(c(1, -c(4.08222/2.58911,-4.8273/2.58911,2.537/2.58911,-1/2.58911)))   验证根反推回的多项式系数是否正确
  #abs(polyroot(c(1,-c(4.08222/2.58911,-4.8273/2.58911,2.537/2.58911,-1/2.58911))))
  
  #计算得到f(x)=7.25141 - 8.83792 x + 8.07868 x^2 - 3.282 x^3 + x^4
  a3<-c(8.83792/7.25141,-8.07868/7.25141,3.282/7.25141,-1/7.25141)
  #polyroot(c(1, -c(8.83792/7.25141,-8.07868/7.25141,3.282/7.25141,-1/7.25141)))   验证根反推回的多项式系数是否正确
  #abs(polyroot(c(1,-c(8.83792/7.25141,-8.07868/7.25141,3.282/7.25141,-1/7.25141))))
  
  gam1<-numeric(ng)
  gam1<-arma.gamma(ng, a1, b, sigma=1)
  plot(seq(1:25),gam1[1:25],type="l",col="red",xlab = "第1个模型",ylab="")
  abline(h=0,col="grey")
  
  gam2<-numeric(ng)
  gam2<-arma.gamma(ng, a2, b, sigma=1)
  plot(seq(1:25),gam2[1:25],type="l",col="blue",xlab = "第2个模型",ylab="")
  abline(h=0,col="grey")
  
  gam3<-numeric(ng)
  gam3<-arma.gamma(ng, a3, b, sigma=1)
  plot(seq(1:25),gam3[1:25],type="l",col="green",xlab = "第3个模型",ylab="")
  abline(h=0,col="grey")
}
demo.ar.roots_self()

第四幅图曲线按照峰值从高到低依次对应三个模型。
对第一个模型来说,复根都很靠近单位圆,所以谱密度会在\(\pi/3\)\(2\pi/3\)附近有峰值,最上面的曲线在\(\lambda_1=1.052\)\(\lambda_2=2.065\)有两个明显峰值,表明相应AR(4)序列自协方差函数有周期\(T_1=2\pi/{\lambda_1}=5.97\)\(T_2=2\pi/{\lambda_2}=3.04\)的特性,\(\lambda_1\)\(\lambda_2\)分别靠近1.047和2.094.
对第二个模型来说,中间的曲线在\(\lambda_1=1.07\)\(\lambda_2=2.06\)有两个明显峰值,表明相应AR(4)序列自协方差函数有周期\(T_1=2\pi/{\lambda_1}=5.87\)\(T_2=2\pi/{\lambda_2}=3.05\)的特性,\(\lambda_1\)\(\lambda_2\)分别靠近1.047和2.094.
对第三个模型来说,对应最下面的曲线在\(\lambda_1=1.217\)\(\lambda_2=1.926\)有两个不明显峰值,表明相应AR(4)序列自协方差函数有周期\(T_1=2\pi/{\lambda_1}=5.16\)\(T_2=2\pi/{\lambda_2}=3.26\)的特性.
最后三张图是\(\gamma_k\)曲线,第一张对应第一个模型,可以看出\(|\gamma_k|\)递减的比较慢,体现出有周期\(T_1,T_2\)的特性,在\(k=0,6,...,24\)处有大峰值,在\(k=0,3,6,...,24\)有峰值。第二张对应第二个模型,可以看出\(|\gamma_k|\)递减的比第一个模型的快,但还是体现出有周期\(T_1,T_2\)的特性。第三张对应第三个模型,可以看出\(|\gamma_k|\)递减的很快,周期特性不再明显。

第三题

模拟一AR(2)模型,\(X_t=0.8X_{t-1}-0.1X_{t-2}+\epsilon_t,\epsilon_t i.i.d~N(0,1)\)
1),计算其谱密度为\(f(x)=\frac{1}{2\pi|(1-0.8e^{iw}+0.1e^{2iw})|^2}\),画出其理论谱密度图。
2). 根据此模型模拟500个观测值,先做白噪声检验利用acf和pacf定出模型AR(2),根据此序列利用Yule-Walker方程估计AR(2)模型的参数值,并根据估计值算出谱密度估计,画出图形,与真实的相比较(画在同一幅图里)。

画理论谱密度图

#法一
library(astsa)
arma.spec(ar=c(0.8,-0.1),main="Autoregression")

#法2 
ar.true.spectrum(c(0.8,-0.1))

#法三 直接计算(实际上与上面的函数相同)
ft <- function(lambda,sigma=1,phi){     #输入AR(2)序列系数
  phi<-abs(phi)
  sigma^2 / (2*pi) / Mod(1 - complex(mod=phi[1], arg=-lambda)+ complex(mod=phi[2], arg=-2*lambda))^2
}
lams <- seq(0, pi, length=201)
spec0 <- ft(lams,phi=c(0.8,-0.1))
plot(lams, spec0, lwd=3, type="l",main="AR(2)序列理论谱密度图",xlab="frequency", ylab="")

模拟序列,模型定阶

#模拟观测值
n<-500;n0<-50  #舍去前50数据
a<-0.8
b<--0.1
#eps <- rnorm(n+n0,0,1)
#X=AR2(eps,n,n0)
X <- arima.sim(model=list(ar=c(a,b)), n=500)    
print(Box.test(X,type = "Ljung-Box", lag = 6))
## 
##  Box-Ljung test
## 
## data:  X
## X-squared = 407.76, df = 6, p-value < 2.2e-16
print(Box.test(X,type = "Ljung-Box", lag = 12))
## 
##  Box-Ljung test
## 
## data:  X
## X-squared = 431.95, df = 12, p-value < 2.2e-16

白噪声检验显示序列值彼此之间蕴涵着相关关系,为非白噪声序列。

library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(fUnitRoots)
## 载入需要的程辑包:timeDate
## 载入需要的程辑包:timeSeries
## 载入需要的程辑包:fBasics
## 
## 载入程辑包:'fBasics'
## The following object is masked from 'package:astsa':
## 
##     nyse
library(forecast)
## 
## 载入程辑包:'forecast'
## The following object is masked from 'package:astsa':
## 
##     gas
library(zoo)
## 
## 载入程辑包:'zoo'
## The following object is masked from 'package:timeSeries':
## 
##     time<-
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
plot(X, type="l",col="red",xlab="time",ylab="",main=expression(X[t]))

acf(X)

pacf(X)

adf.test(X)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  X
## Dickey-Fuller = -7.8404, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
auto.arima(X)
## Series: X 
## ARIMA(2,0,0) with zero mean 
## 
## Coefficients:
##          ar1      ar2
##       0.8336  -0.1478
## s.e.  0.0442   0.0443
## 
## sigma^2 = 1.038:  log likelihood = -718.23
## AIC=1442.47   AICc=1442.52   BIC=1455.11

从 ACF 图中可以看到Lag=4之后,ACF大致控制在2倍标准差之内,可以判断该序列具有短期相关性,即认为该序列符合平稳序列的特征。且从PACF图可以看出AR序列“2阶截尾”,说明序列符合平稳性。此外利用单位根检验发现序列也是平稳的。
从 ACF 图和 PACF 图可以看出,自相关系数“拖尾”,偏自相关系数“2阶截尾”,因此初步确定拟合的模型为AR(2)模型。采用auto.arima函数得到了同样的结论。

利用Y-W方程估计参数值

#利用Yule-Walker方程估计
fit.yw <- ar(X, aic=FALSE, order.max=2,method="yule-walker") 
cat("利用Yule-Walker方程估计的phi1和phi2是:",fit.yw$ar[1],"和",fit.yw$ar[2],"\n",sep="")
## 利用Yule-Walker方程估计的phi1和phi2是:0.8313212和-0.1479688
cat("利用Yule-Walker方程估计的sigma_epsilon是:",fit.yw$var.pred,sep="")
## 利用Yule-Walker方程估计的sigma_epsilon是:1.045438

根据估计值做谱密度估计

根据Y-W方程得到的估计参数,算出谱密度估计为\(f(x)=\frac{1.045438}{2\pi|(1-0.8313212e^{iw}+0.1479688e^{2iw})|^2}\)(因为arima的随机性,实际上生成html的时候每次得到的估计值不一定一样,不过道理相同,用上面得到的估计方差和参数带入上式即可)

yw.spec<-function(phi1,X,fit.yw){    #输入真实的系数,输入序列(作YW估计)
  lams <- seq(0, pi, length=201)
  spec0 <- ft(lams,sigma=1,phi=phi1)
  plot(lams, spec0,lwd=3,type="l",lty=1,main="AR(2)序列理论谱密度图",xlab="frequency", ylab="")
  
  spec1 <- ar.true.spectrum(fit.yw$ar,ngrid=length(lams),sigma=sqrt(fit.yw$var.pred),plot.it=FALSE)$spectrum    #实际上相当于用ft函数算了一遍谱密度值
  lines(lams, spec1, col="green", lwd=2, lty=2)
  legend("topright", lwd=c(3,2), lty=c(1,2),col=c("black", "green"),
legend=c("True","Y-W得到的谱密度"))
}
yw.spec(phi=c(0.8,-0.1),X,fit.yw)

第四题

时间数据建模。考虑美国实际GDP(1941-2012年)数据,q-gdpmc1.txt。
1)画出时序图,考虑做对数差分GDP(GDP增长率)Yt
2) 画出Yt的时序图,做白噪声检验,并做均值是否为零的检验(t检验)。
3)画出Yt的acf和pacf图,确定AR模型,定阶并做估计

画出时序图

data<-read.table("D:/atas/atas_exp3/q-gdpmc1.txt",header=TRUE)
x = ts(data$gdp, start=c(1947,1),frequency = 4)
plot(x,main="美国实际GDP")

(2)做对数差分, 做白噪声、t检验

#做对数差分,画时序图
Y=diff(log(x))
plot(Y,main="GDP增长率")

#做白噪声检验
print(Box.test(Y,type = "Ljung-Box", lag = 6))
## 
##  Box-Ljung test
## 
## data:  Y
## X-squared = 55.586, df = 6, p-value = 3.53e-10
print(Box.test(Y,type = "Ljung-Box", lag = 12))
## 
##  Box-Ljung test
## 
## data:  Y
## X-squared = 64.259, df = 12, p-value = 3.737e-09
#做均值是否为零的检验(t检验)
t.test(Y,mu=0)
## 
##  One Sample t-test
## 
## data:  Y
## t = 12.786, df = 262, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.006573382 0.008966545
## sample estimates:
##   mean of x 
## 0.007769964

白噪声检验显示序列值彼此之间蕴涵着相关关系,为非白噪声序列。
t检验说明均值为0的假设不成立。

画出Yt的acf和pacf图,确定AR模型,定阶并做估计

library(zoo)
library(TSA)
## Registered S3 methods overwritten by 'TSA':
##   method       from    
##   fitted.Arima forecast
##   plot.Arima   forecast
## 
## 载入程辑包:'TSA'
## The following objects are masked from 'package:timeDate':
## 
##     kurtosis, skewness
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar
library(forecast)
acf(Y)

pacf(Y)

#自动拟合
auto.arima(Y)
## Series: Y 
## ARIMA(3,0,1)(2,0,1)[4] with non-zero mean 
## 
## Coefficients:
##           ar1     ar2      ar3     ma1     sar1     sar2    sma1    mean
##       -0.2310  0.3256  -0.0171  0.5677  -0.6868  -0.0405  0.5850  0.0077
## s.e.   0.3059  0.1109   0.0829  0.2987   0.4448   0.0852  0.4386  0.0009
## 
## sigma^2 = 8.391e-05:  log likelihood = 864.99
## AIC=-1711.99   AICc=-1711.27   BIC=-1679.84
#BIC定阶
res=armasubsets(y=Y,nar=15,nma=15,y.name='test',ar.method='ols')
## Reordering variables and trying again:
plot(res)

#AIC定阶
ar(Y,method = "mle")
## 
## Call:
## ar(x = Y, method = "mle")
## 
## Coefficients:
##       1        2        3  
##  0.3475   0.1283  -0.1121  
## 
## Order selected 3  sigma^2 estimated as  8.17e-05

从 ACF 图中可以看到Lag=3之后,ACF大致控制在2倍标准差之内,可以判断该序列具有短期相关性,即认为该序列符合平稳序列的特征。且从PACF图可以看出AR序列“3阶截尾”,说明序列符合平稳性。此外利用单位根检验发现序列也是平稳的。
从 ACF 图和 PACF 图可以看出,自相关系数“拖尾”,偏自相关系数“3阶截尾”,因此初步确定拟合的模型为AR(3)模型。
采用auto.arima函数得到拟合为ARIMA(3,0,1)(2,0,1)[4]模型较合适,考虑AR模型的话可以选取AR(3).
采用TSA包的armasubsets函数发现选择AR(1)模型BIC值最小。
采用ar函数利用mle方法发现AR(3)模型AIC值最小。
综上结合题意,选择AR(3)模型,即定阶数为3。

#参数的极大似然估计
Y.fit = arima(Y, order=c(3,0,0), method = "ML")
Y.fit
## 
## Call:
## arima(x = Y, order = c(3, 0, 0), method = "ML")
## 
## Coefficients:
##          ar1     ar2      ar3  intercept
##       0.3476  0.1281  -0.1121     0.0077
## s.e.  0.0612  0.0644   0.0613     0.0009
## 
## sigma^2 estimated as 8.17e-05:  log likelihood = 864.45,  aic = -1720.9

根据参数估计结果,写出模型的表达式:\(X_t-0.0077=\frac{\epsilon_t}{1-0.3476B-0.1281B^2+0.1121B^3},Var(\epsilon_t)=8.17*10^{-5}\)

检验模型

# 对残差序列 Y.fit$residuals 进行白噪声检验
for (i in 1:2) {
print(Box.test(Y.fit$residuals, lag = 6*i))
}
## 
##  Box-Pierce test
## 
## data:  Y.fit$residuals
## X-squared = 3.5054, df = 6, p-value = 0.7433
## 
## 
##  Box-Pierce test
## 
## data:  Y.fit$residuals
## X-squared = 12.369, df = 12, p-value = 0.4165
Y.fit
## 
## Call:
## arima(x = Y, order = c(3, 0, 0), method = "ML")
## 
## Coefficients:
##          ar1     ar2      ar3  intercept
##       0.3476  0.1281  -0.1121     0.0077
## s.e.  0.0612  0.0644   0.0613     0.0009
## 
## sigma^2 estimated as 8.17e-05:  log likelihood = 864.45,  aic = -1720.9
# ar1 系数的显著性检验
t1 = 0.3476/0.0612
# 利用 P 值函数 pt()计算P值,其中自由度df为时间序列样本数减去未知参数个数,本案例中为 263-4=259
p1 = 2*pt(t1, df=259, lower.tail = F)
# ar2 系数的显著性检验
t2 = 0.1281/0.0644
p2 = 2*pt(t2, df=259, lower.tail = F)
# ar3 系数的显著性检验
t3 = -0.1121/0.0613 
p3 = 2*pt(t3, df=259, lower.tail = T)
# 常数的显著性检验
t0 = 0.0077/0.0009
p0 = 2*pt(t0, df=259, lower.tail = F)
p_value = data.frame(p1, p2,p3, p0)
row.names(p_value) = "P 值"
p_value
##                p1         p2         p3           p0
## P 值 3.616389e-08 0.04773877 0.06859282 1.044972e-15

由于各阶延迟下的LB统计量的P值都显著大于\(\alpha=0.05\),接受残差序列为白噪声序列的原假设,即该拟合模型在置信水平 95% 下显著有效。

检验结果显示,三个系数的P值基本都小于\(\alpha=0.05\)(除p3略大于0.05),拒绝系数等于0的原假设,因此在置信水平 95% 下均显著非零。

对未来十年进行预测

library(forecast)
Y.fit = stats::arima(Y, order=c(3,0,0), method = "ML")
#先调用forecast后调用TSA包会出现The following objects are masked from ‘package:stats’: acf, arima。而forecast包适配的是stat里的arima,不加的话Y.fit$series="x"
#先调用TSA后调用forecast包也可以解决该问题
Y.fore = forecast(Y.fit, h=20)
#Y.fore
plot(Y)

plot(Y.fore,data=Y)

#指数平滑法预测
Y.Holt <- HoltWinters(Y)
Yforecast.Holt<-forecast(Y.Holt,h=20)
plot(Yforecast.Holt)