\[Y_{n*q}=X_{n*p}B_{p*r}A_{q*r}^T+E_{n*q}\]
#变分法
SRRR2<-function(X,Y,lambda,r){
#先读入维度
n=nrow(X)
p=ncol(X)
q=ncol(Y)
int<-0
B<-matrix(0,ncol = r,nrow = p)
for(i in 1:r){B[i,i]<-1}
Bhat<-matrix(0,ncol = r,nrow = p)
B0<-matrix(1,ncol = r,nrow = p)#差矩阵
check=c(10000,0)#用来储存目标估计式的值
while(abs(check[1]-check[2])>0.01){#不收敛先固定B,更新A
A<-svd(t(Y)%*%X%*%B)$u%*%t(svd(t(Y)%*%X%*%B)$v)
while(norm(B0,"F")>0.01){#此时检查B是否收敛
mu<-c()
lammu<-c()
for(i in 1:p){
mu[i]<-1/(norm(as.matrix(B[i,]),"F")+0.00000001)
lammu[i]<-lambda[i]*mu[i]
}
Bhat<-solve(t(X)%*%X+diag(lammu,nrow=p,ncol=p)/2)%*%t(X)%*%Y%*%A
B0<-B-Bhat
B<-Bhat
int<-int+1
if(int==5000) return(list(A,B,int))#判定不收敛
}
c=0
for(i in 1:p){#检查目标式收敛
c=c+lambda[i]*norm(as.matrix(B[i,]),"F")
}
check[1]<-check[2]
check[2]<-norm(Y-X%*%B%*%t(A),"2")+c
}
return(list(A,B,int))
}
#次梯度法
SRRR<-function(X,Y,lambda,r){
#先读入维度
n=nrow(X)
p=ncol(X)
q=ncol(Y)
f0<-c()
for(l in 1:p){
f0[l]<-t(X[,l])%*%X[,l]+0.000001
}
int<-0
#用无惩罚项的降秩模型估计值作为AB的初值
Sxx<-t(X)%*%X/n
Sxy<-t(X)%*%Y/n
Syx<-t(Y)%*%X/n
S<-Syx%*%solve(Sxx)%*%Sxy
A<-eigen(S)$vector[,1:r]
A<-as.matrix(A)
B<-solve(Sxx)%*%Sxy%*%A
Bhat<-matrix(0,ncol = r,nrow = p)#用来储存每次迭代中的结果
B0<-matrix(1,ncol = r,nrow = p)#差矩阵
check=c(10000,0)#用来储存目标估计式的值
while(abs(check[1]-check[2])>0.01){#不收敛先固定B,更新A
A<-svd(t(Y)%*%X%*%B)$u%*%t(svd(t(Y)%*%X%*%B)$v)
while(norm(B0,"F")>0.01){#此时检查B是否收敛
for(l in 1:p){#不收敛则固定A,更新B
Rl<-Y%*%A-X%*%B+X[,l]%*%t(B[l,])
s<-lambda[l]/2/(norm(t(X[,l])%*%Rl,"F")+0.000001)
if(s<1){
Bhat[l,]<-t(X[,l])%*%Rl*(1-s)/f0[l]
}else Bhat[l,]<-rep(0,r)
}
B0<-B-Bhat
B<-Bhat
int<-int+1
if(int==500) return(list(A,B,int))#判定不收敛
}
c=0
for(i in 1:p){#检查目标式收敛
c=c+lambda[i]*norm(as.matrix(B[i,]),"F")
}
check[1]<-check[2]
check[2]<-norm(Y-X%*%B%*%t(A),"2")+c
}
return(list(A,B,int))
}
#SRRR函数cv版本 lambda的选择范围51-70 设置在函数内部
cv.SRRR<-function(X,Y){
K<-5
lmin<-51
lmax<-70
n=nrow(X)
p=ncol(X)
q=ncol(Y)
error<-matrix(0,nrow = 100,ncol = q)
foldid<-sample(rep(1:K,length = n))
el<-c()
rr<-c()
for(l in lmin:lmax){
lambda=rep(l,p)
for(r in 1:q){
for(i in 1:K){
Xh<-X[foldid != i,]
Yh<-Y[foldid != i,]
Xp<-X[foldid == i,]
Yp<-Y[foldid == i,]
result<-SRRR(Xh,Yh,lambda,r)
print(result[[3]])
A1=result[[1]]
B1=result[[2]]
if(result[[3]]==0) {error[l-lmin+1,r]=1000000
}else error[l-lmin+1,r]<-error[l-lmin+1,r]+norm(Yp-Xp%*%(B1%*%t(A1)),"F")^2/1000
}
}
rr[l-lmin+1]<-which.min(error[l-lmin+1,])
el[l-lmin+1]<-error[l-lmin+1,rr[l-lmin+1]]
}
lam<-which.min(el)+lmin-1
return(list(lam,rr[lam-lmin+1]))
}
#SRRR2函数cv版本
cv.SRRR2<-function(X,Y){
K<-5
lmin<-35
lmax<-60
n=nrow(X)
p=ncol(X)
q=ncol(Y)
error<-matrix(0,nrow = 100,ncol = q)
foldid<-sample(rep(1:K,length = n))
el<-c()
rr<-c()
for(l in lmin:lmax){
lambda=rep(l,p)
for(r in 1:q){
for(i in 1:K){
Xh<-X[foldid != i,]
Yh<-Y[foldid != i,]
Xp<-X[foldid == i,]
Yp<-Y[foldid == i,]
result<-SRRR2(Xh,Yh,lambda,r)
print(result[[3]])
A1=result[[1]]
B1=result[[2]]
if(result[[3]]==0) {error[l-lmin+1,r]=1000000
}else error[l-lmin+1,r]<-error[l-lmin+1,r]+norm(Yp-Xp%*%(B1%*%t(A1)),"F")^2/1000
}
}
rr[l-lmin+1]<-which.min(error[l-lmin+1,])
el[l-lmin+1]<-error[l-lmin+1,rr[l-lmin+1]]
}
lam<-which.min(el)+lmin-1
return(list(lam,rr[lam-lmin+1]))
}
#这是1a情况,暂时认为trace(E)指的是E的协方差阵
library(MASS)
set.seed(12345)
n<-100
p=30
p0=10
q=10
r=3
rowx=0
rowe=0
sigmaX<-diag(1,nrow = p,ncol = p)
X<-mvrnorm(n,rep(0,p),sigmaX)
Bt<-matrix(c(rnorm(p0*r,0,1),rep(0,(p-p0)*r)),nrow = r,ncol = p)
B<-t(Bt)
A<-matrix(rnorm(q*r,0,1),nrow = q,ncol = r)
C=B%*%t(A)
sigma2<-sum(diag((t(C)%*%C)))/q
sigmaE<-diag(sqrt(sigma2),nrow = q,ncol = q)
E<-mvrnorm(n,rep(0,q),sigmaE)
Y<-X%*%B%*%t(A)+E
#这是调用Spls
library(pls)
##
## 载入程辑包:'pls'
## The following object is masked from 'package:stats':
##
## loadings
library(spls)
## Sparse Partial Least Squares (SPLS) Regression and
## Classification (version 2.2-3)
cv<-cv.spls(X,Y,fold = 5,K=c(1:p),eta=seq(0.1,0.9,0.1))
## eta = 0.1
## eta = 0.2
## eta = 0.3
## eta = 0.4
## eta = 0.5
## eta = 0.6
## eta = 0.7
## eta = 0.8
## eta = 0.9
##
## Optimal parameters: eta = 0.5, K = 5
cv$eta.opt
## [1] 0.5
cv$K.opt
## [1] 5
f<-spls(X,Y,cv$K.opt,cv$eta.opt)
norm(X%*%(f$betahat-C),"F")^2/n/q
## [1] 0.6012886
library(rrpack)
sr1_1<-cv.srrr(Y,X,nrank=3,method="glasso",nfold = 5)
norm(X%*%(sr1_1$coef-C),"F")^2/n/q
## [1] 0.3569562
sr1_2<-cv.srrr(Y,X,nrank=3,method="adglasso",nfold = 5)
norm(X%*%(sr1_2$coef-C),"F")^2/n/q
## [1] 0.2966013
#cv.SRRR(X,Y)
#result is 51,3
lambda<-rep(51,p)
result1<-SRRR(X,Y,lambda,3)
A1=result1[[1]]
B1=result1[[2]]
result1[[3]]
## [1] 13
norm(X%*%(B1%*%t(A1)-C),"F")^2/n/q
## [1] 0.3154301
#lasso
library(glmnet)
## 载入需要的程辑包:Matrix
## Loaded glmnet 4.1-3
las<-cv.glmnet(X,Y,family = "mgaussian")
lcoef<-coef(las$glmnet.fit,s=las$lambda.1se,exact = F,foldid=5)
l<-matrix(0,nrow = p,ncol = q)
for(j in 1:q){
l1<-lcoef[[j]]@i
l2<-lcoef[[j]]@x
l1<-l1[-1]
l2<-l2[-1]
l[l1,j]=l2
}
norm(X%*%(l-C),"F")^2/n/q
## [1] 0.6439364
setwd('H:/360MoveData/Users/user/Documents')
system("R CMD SHLIB remMap.c")
## [1] 0
dyn.load("remMap.dll")
#lambda1<-rep(11,p)#lasso
#lambda2<-rep(19,p)#group lasso
#rem<-remMap(X,Y,lambda1,lambda2)
#norm(X%*%(rem$phi-C),"F")^2/n/q
#0.5895708
#lambda1<-rep(0,p)
#lambda2<-rep(44,p)
#rem<-remMap(X,Y,lambda1,lambda2)
#norm(X%*%(rem$phi-C),"F")^2/n/q
#0.5842402
set.seed(12345)
n<-100
p=30
p0=10
q=10
r=10
rowx=0
rowe=0
sigmaX<-diag(1,nrow = p,ncol = p)
X<-mvrnorm(n,rep(0,p),sigmaX)
Bt<-matrix(c(rnorm(p0*r,0,1),rep(0,(p-p0)*r)),nrow = r,ncol = p)
B<-t(Bt)
A<-matrix(rnorm(q*r,0,1),nrow = q,ncol = r)
C=B%*%t(A)
sigma2<-sum(diag((t(C)%*%C)))/q
sigmaE<-diag(sqrt(sigma2),nrow = q,ncol = q)
E<-mvrnorm(n,rep(0,q),sigmaE)
Y<-X%*%B%*%t(A)+E
lambda<-rep(110,p)
result1<-SRRR(X,Y,lambda,r)
A1=result1[[1]]
B1=result1[[2]]
result1[[3]]
## [1] 106
norm(X%*%(B1%*%t(A1)-C),"F")^2/n/q
## [1] 1.630721
lambda<-rep(20,p)
result1<-SRRR2(X,Y,lambda,r)
A1=result1[[1]]
B1=result1[[2]]
result1[[3]]
## [1] 4
norm(X%*%(B1%*%t(A1)-C),"F")^2/n/q
## [1] 1.074774
library(pls)
library(spls)
cv<-cv.spls(X,Y,fold = 5,K=c(1:p),eta=seq(0.1,0.9,0.1))
## eta = 0.1
## eta = 0.2
## eta = 0.3
## eta = 0.4
## eta = 0.5
## eta = 0.6
## eta = 0.7
## eta = 0.8
## eta = 0.9
##
## Optimal parameters: eta = 0.9, K = 10
f<-spls(X,Y,cv$K.opt,cv$eta.opt)
norm(X%*%(f$betahat-C),"F")^2/n/q
## [1] 1.379994
library(rrpack)
sr1_1<-cv.srrr(Y,X,nrank=10,method="glasso",nfold = 5)
norm(X%*%(sr1_1$coef-C),"F")^2/n/q
## [1] 1.889194
sr1_2<-cv.srrr(Y,X,nrank=10,method="adglasso",nfold = 5)
norm(X%*%(sr1_2$coef-C),"F")^2/n/q
## [1] 1.176664
#lambda1<-rep(10,p)#lasso
#lambda2<-rep(40,p)#group lasso
#rem<-remMap(X,Y,lambda1,lambda2)
#norm(X%*%(rem$phi-C),"F")^2/n/q
#1.764612
#lasso
library(glmnet)
las<-cv.glmnet(X,Y,family = "mgaussian")
lcoef<-coef(las$glmnet.fit,s=las$lambda.1se,exact = F,foldid=5)
l<-matrix(0,nrow = p,ncol = q)
for(j in 1:q){
l1<-lcoef[[j]]@i
l2<-lcoef[[j]]@x
l1<-l1[-1]
l2<-l2[-1]
l[l1,j]=l2
}
norm(X%*%(l-C),"F")^2/n/q
## [1] 2.307772
set.seed(12345)
n<-100
p=30
p0=10
q=10
r=3
rowx=0.5
rowe=0
sigmaX<-matrix(rep(rowx,p*p),nrow = p,ncol = p)
diag(sigmaX)<-1
X<-mvrnorm(n,rep(0,p),sigmaX)
Bt<-matrix(c(rnorm(p0*r,0,1),rep(0,(p-p0)*r)),nrow = r,ncol = p)
B<-t(Bt)
A<-matrix(rnorm(q*r,0,1),nrow = q,ncol = r)
C=B%*%t(A)
sigma2<-sum(diag((t(C)%*%sigmaX%*%C)))/q
sigmaE<-diag(sqrt(sigma2),nrow = q,ncol = q)
E<-mvrnorm(n,rep(0,q),sigmaE)
Y<-X%*%B%*%t(A)+E
#次梯度法失效了 变分法表现也很差
lambda<-rep(20,p)
result1<-SRRR2(X,Y,lambda,r)
A1=result1[[1]]
B1=result1[[2]]
result1[[3]]
## [1] 18
norm(X%*%(B1%*%t(A1)-C),"F")^2/n/q
## [1] 0.9384551
#lambda1<-rep(6,p)#lasso
#lambda2<-rep(7,p)#group lasso
#rem<-remMap(X,Y,lambda1,lambda2)
#norm(X%*%(rem$phi-C),"F")^2/n/q
#0.5668705
library(pls)
library(spls)
cv<-cv.spls(X,Y,fold = 5,K=c(1:p),eta=seq(0.1,0.9,0.1))
## eta = 0.1
## eta = 0.2
## eta = 0.3
## eta = 0.4
## eta = 0.5
## eta = 0.6
## eta = 0.7
## eta = 0.8
## eta = 0.9
##
## Optimal parameters: eta = 0.9, K = 9
f<-spls(X,Y,cv$K.opt,cv$eta.opt)
norm(X%*%(f$betahat-C),"F")^2/n/q
## [1] 0.7368045
library(rrpack)
sr1_1<-cv.srrr(Y,X,nrank=3,method="glasso",nfold = 5)
norm(X%*%(sr1_1$coef-C),"F")^2/n/q
## [1] 0.3057546
sr1_2<-cv.srrr(Y,X,nrank=3,method="adglasso",nfold = 5)
norm(X%*%(sr1_2$coef-C),"F")^2/n/q
## [1] 0.2747283
#lasso
library(glmnet)
las<-cv.glmnet(X,Y,family = "mgaussian")
lcoef<-coef(las$glmnet.fit,s=las$lambda.1se,exact = F,foldid=5)
l<-matrix(0,nrow = p,ncol = q)
for(j in 1:q){
l1<-lcoef[[j]]@i
l2<-lcoef[[j]]@x
l1<-l1[-1]
l2<-l2[-1]
l[l1,j]=l2
}
norm(X%*%(l-C),"F")^2/n/q
## [1] 0.7111148
set.seed(12345)
n<-100
p=30
p0=10
q=10
r=3
rowx=0
rowe=0.5
sigmaX<-diag(1,nrow = p,ncol = p)
X<-mvrnorm(n,rep(0,p),sigmaX)
Bt<-matrix(c(rnorm(p0*r,0,1),rep(0,(p-p0)*r)),nrow = r,ncol = p)
B<-t(Bt)
A<-matrix(rnorm(q*r,0,1),nrow = q,ncol = r)
C=B%*%t(A)
sigma2<-sum(diag((t(C)%*%sigmaX%*%C)))/q
sigmaE<-matrix(rep(rowe,q*q),nrow = q,ncol = q)
diag(sigmaE)=rep(1,q)
E<-mvrnorm(n,rep(0,q),sigma2*sigmaE)
Y<-X%*%B%*%t(A)+E
lambda<-rep(160,p)
result1<-SRRR(X,Y,lambda,r)
A1=result1[[1]]
B1=result1[[2]]
result1[[3]]
## [1] 10
norm(X%*%(B1%*%t(A1)-C),"F")^2/n/q
## [1] 3.058742
lambda<-rep(30,p)
result1<-SRRR2(X,Y,lambda,r)
A1=result1[[1]]
B1=result1[[2]]
result1[[3]]
## [1] 24
norm(X%*%(B1%*%t(A1)-C),"F")^2/n/q
## [1] 5.281752
#lambda1<-rep(30,p)#lasso
#lambda2<-rep(30,p)#group lasso
#rem<-remMap(X,Y,lambda1,lambda2)
#norm(X%*%(rem$phi-C),"F")^2/n/q
#2.785652
library(pls)
library(spls)
cv<-cv.spls(X,Y,fold = 5,K=c(1:p),eta=seq(0.1,0.9,0.1))
## eta = 0.1
## eta = 0.2
## eta = 0.3
## eta = 0.4
## eta = 0.5
## eta = 0.6
## eta = 0.7
## eta = 0.8
## eta = 0.9
##
## Optimal parameters: eta = 0.9, K = 8
f<-spls(X,Y,cv$K.opt,cv$eta.opt)
norm(X%*%(f$betahat-C),"F")^2/n/q
## [1] 3.426848
library(rrpack)
sr1_1<-cv.srrr(Y,X,nrank=3,method="glasso",nfold = 5)
norm(X%*%(sr1_1$coef-C),"F")^2/n/q
## [1] 4.16516
sr1_2<-cv.srrr(Y,X,nrank=3,method="adglasso",nfold = 5)
norm(X%*%(sr1_2$coef-C),"F")^2/n/q
## [1] 3.822155
#lasso
library(glmnet)
las<-cv.glmnet(X,Y,family = "mgaussian")
lcoef<-coef(las$glmnet.fit,s=las$lambda.1se,exact = F,foldid=5)
l<-matrix(0,nrow = p,ncol = q)
for(j in 1:q){
l1<-lcoef[[j]]@i
l2<-lcoef[[j]]@x
l1<-l1[-1]
l2<-l2[-1]
l[l1,j]=l2
}
norm(X%*%(l-C),"F")^2/n/q
## [1] 3.948478
set.seed(12345)
n<-100
p=100
p0=30
q=10
r=3
rowx=0
rowe=0
sigmaX<-diag(1,nrow = p,ncol = p)
X<-mvrnorm(n,rep(0,p),sigmaX)
Bt<-matrix(c(rnorm(p0*r,0,1),rep(0,(p-p0)*r)),nrow = r,ncol = p)
B<-t(Bt)
A<-matrix(rnorm(q*r,0,1),nrow = q,ncol = r)
C=B%*%t(A)
sigma2<-sum(diag((t(C)%*%C)))/q
sigmaE<-diag(sqrt(sigma2),nrow = q,ncol = q)
E<-mvrnorm(n,rep(0,q),sigmaE)
Y<-X%*%B%*%t(A)+E
#这是调用Spls
library(pls)
library(spls)
cv<-cv.spls(X,Y,fold = 5,K=c(20:50),eta=seq(0.1,0.9,0.1))
## eta = 0.1
## eta = 0.2
## eta = 0.3
## eta = 0.4
## eta = 0.5
## eta = 0.6
## eta = 0.7
## eta = 0.8
## eta = 0.9
##
## Optimal parameters: eta = 0.8, K = 20
cv$eta.opt
## [1] 0.8
cv$K.opt
## [1] 20
f<-spls(X,Y,cv$K.opt,cv$eta.opt)
norm(X%*%(f$betahat-C),"F")^2/n/q
## [1] 2.22392
#cv.SRRR2(X,Y)
#36,3
lambda<-rep(36,p)
result1<-SRRR2(X,Y,lambda,r)
A1=result1[[1]]
B1=result1[[2]]
result1[[3]]
## [1] 33
norm(X%*%(B1%*%t(A1)-C),"F")^2/n/q
## [1] 1.037124
#这里CV没有选择到实际误差最小的,但是很接近了
lambda<-rep(49,p)
result1<-SRRR2(X,Y,lambda,r)
A1=result1[[1]]
B1=result1[[2]]
result1[[3]]
## [1] 35
norm(X%*%(B1%*%t(A1)-C),"F")^2/n/q
## [1] 1.009248
library(rrpack)
sr1_1<-cv.srrr(Y,X,nrank=3,method="glasso",nfold = 5)
norm(X%*%(sr1_1$coef-C),"F")^2/n/q
## [1] 0.9806119
sr1_2<-cv.srrr(Y,X,nrank=3,method="adglasso",nfold = 5)
norm(X%*%(sr1_2$coef-C),"F")^2/n/q
## [1] 1.372878
#这里我认为结果来自于算法对收敛的判定上
#lasso
library(glmnet)
las<-cv.glmnet(X,Y,family = "mgaussian")
lcoef<-coef(las$glmnet.fit,s=las$lambda.1se,exact = F,foldid=5)
l<-matrix(0,nrow = p,ncol = q)
for(j in 1:q){
l1<-lcoef[[j]]@i
l2<-lcoef[[j]]@x
l1<-l1[-1]
l2<-l2[-1]
l[l1,j]=l2
}
norm(X%*%(l-C),"F")^2/n/q
## [1] 2.626067
set.seed(12345)
n<-100
p=300
p0=30
q=30
r=3
rowx=0
rowe=0
sigmaX<-diag(1,nrow = p,ncol = p)
X<-mvrnorm(n,rep(0,p),sigmaX)
Bt<-matrix(c(rnorm(p0*r,0,1),rep(0,(p-p0)*r)),nrow = r,ncol = p)
B<-t(Bt)
A<-matrix(rnorm(q*r,0,1),nrow = q,ncol = r)
C=B%*%t(A)
sigma2<-sum(diag((t(C)%*%C)))/q
sigmaE<-diag(sqrt(sigma2),nrow = q,ncol = q)
E<-mvrnorm(n,rep(0,q),sigmaE)
Y<-X%*%B%*%t(A)+E
lambda<-rep(30,p)
result1<-SRRR2(X,Y,lambda,r)
A1=result1[[1]]
B1=result1[[2]]
result1[[3]]
## [1] 50
norm(X%*%(B1%*%t(A1)-C),"F")^2/n/q
## [1] 3.571174
library(rrpack)
sr1_1<-cv.srrr(Y,X,nrank=3,method="glasso",nfold = 5)
norm(X%*%(sr1_1$coef-C),"F")^2/n/q
## [1] 1.058996
sr1_2<-cv.srrr(Y,X,nrank=3,method="adglasso",nfold = 5)
norm(X%*%(sr1_2$coef-C),"F")^2/n/q
## [1] 1.059035
#lasso
library(glmnet)
las<-cv.glmnet(X,Y,family = "mgaussian")
lcoef<-coef(las$glmnet.fit,s=las$lambda.1se,exact = F,foldid=5)
l<-matrix(0,nrow = p,ncol = q)
for(j in 1:q){
l1<-lcoef[[j]]@i
l2<-lcoef[[j]]@x
l1<-l1[-1]
l2<-l2[-1]
l[l1,j]=l2
}
norm(X%*%(l-C),"F")^2/n/q
## [1] 5.261364
set.seed(12345)
n<-100
p=100
p0=100
q=10
r=10
rowx=0
rowe=0
sigmaX<-diag(1,nrow = p,ncol = p)
X<-mvrnorm(n,rep(0,p),sigmaX)
Bt<-matrix(c(rnorm(p0*r,0,1),rep(0,(p-p0)*r)),nrow = r,ncol = p)
B<-t(Bt)
A<-matrix(rnorm(q*r,0,1),nrow = q,ncol = r)
C=B%*%t(A)
sigma2<-sum(diag((t(C)%*%C)))/q
sigmaE<-diag(sqrt(sigma2),nrow = q,ncol = q)
E<-mvrnorm(n,rep(0,q),sigmaE)
Y<-X%*%B%*%t(A)+E
lambda<-rep(50,p)
result1<-SRRR2(X,Y,lambda,r)
A1=result1[[1]]
B1=result1[[2]]
result1[[3]]
## [1] 17
norm(X%*%(B1%*%t(A1)-C),"F")^2/n/q
## [1] 23.27391
library(rrpack)
sr1_1<-cv.srrr(Y,X,nrank=10,method="glasso",nfold = 5)
norm(X%*%(sr1_1$coef-C),"F")^2/n/q
## [1] 23.26146
sr1_2<-cv.srrr(Y,X,nrank=10,method="adglasso",nfold = 5)
norm(X%*%(sr1_2$coef-C),"F")^2/n/q
## [1] 43.20298
#lasso
library(glmnet)
las<-cv.glmnet(X,Y,family = "mgaussian")
lcoef<-coef(las$glmnet.fit,s=las$lambda.1se,exact = F,foldid=5)
l<-matrix(0,nrow = p,ncol = q)
for(j in 1:q){
l1<-lcoef[[j]]@i
l2<-lcoef[[j]]@x
l1<-l1[-1]
l2<-l2[-1]
l[l1,j]=l2
}
norm(X%*%(l-C),"F")^2/n/q
## [1] 25.90167