require(SFSI)
data(wheatHTP)
index = which(Y$trial %in% 1:6) # Use only a subset of data
Y = Y[index,]
M = scale(M[index,])/sqrt(ncol(M)) # Subset and scale markers
G = tcrossprod(M) # Genomic relationship matrix
y = as.vector(scale(Y[,"E1"])) # Scale response variable
# Training and testing sets
tst = which(Y$trial == 2)
trn = which(Y$trial != 2)
# Calculate variance components ratio using training data
yNA = y
yNA[tst] = NA
fm0 = fitBLUP(yNA,K=G)
theta = fm0$varE/fm0$varU
h2 = fm0$varU/(fm0$varU + fm0$varE)
b = fm0$b # intercept
# Sparse selection index
fm1 = SSI(y,K=G,theta=theta,b=b,trn=trn,tst=tst)
summary(fm1)$optCOR
# \donttest{
if(requireNamespace("float")){
# Using a 'float' type variable for K
G2 = float::fl(G)
fm2 = SSI(y,K=G2,theta=theta,b=b,trn=trn,tst=tst)
summary(fm2)$optCOR # compare with above results
}
# }
#---------------------------------------------------
# Predicting a testing set using a value of lambda
# obtained from cross-validation in a traning set
#---------------------------------------------------
# \donttest{
# Run a cross validation in training set
fm2 = SSI.CV(y,K=G,theta=theta,b=b,trn=trn,nfolds=5,name="1 5CV")
lambda = summary(fm2)$optCOR["lambda"]
# Fit the index with the obtained lambda
fm3 = SSI(y,K=G,theta=theta,b=b,trn=trn,tst=tst,lambda=lambda)
summary(fm3)$accuracy # Testing set accuracy
# Compare the accuracy with that of the non-sparse index (G-BLUP)
cor(fm0$u[tst],y[tst])
# Obtain an 'optimal' lambda by repeating the CV several times
fm22 = SSI.CV(y,K=G,theta=theta,b=b,trn=trn,nCV=5,name="5 5CV")
plot(fm22,fm2)
# }
Run the code above in your browser using DataLab