############################################
############################################
# breeding values with 1 variance component
############################################
############################################
##== simulate data
##== random population of 200 lines with 1000 markers
M <- matrix(rep(0,200*1000),1000,200)
for (i in 1:200) {
M[,i] <- ifelse(runif(1000)<0.5,-1,1)
}
##== phenotypes
QTL <- 100*(1:5) #pick 5 QTL
u <- rep(0,1000) #marker effects
u[QTL] <- 1
g <- as.vector(crossprod(M,u))
h2 <- 0.5
y <- g + rnorm(200,mean=0,sd=sqrt((1-h2)/h2*var(g)))
M <- t(M)
##== fit the model
Z1 <- diag(length(y))
ETA <- list( list(Z=Z1, K=A.mat(M)))
ans <- mmer(y=y, Z=ETA, method="EMMA")
############################################
############################################
# GWAS with 1 variance component and one A matrix
############################################
############################################
ETA <- list( list(Z=Z1, K=A.mat(M))) # random effects for genotypes
ETA2 <- M # markers as fixed effects
# RUN IT:
#ans <- mmer(y=y, Z=ETA, W=ETA2, method="EMMA")
############################################
############################################
# breeding values with 3 variance components
# hybrid prediction
############################################
############################################
data(cornHybrid)
hybrid2 <- cornHybrid$hybrid # extract cross data
A <- cornHybrid$K
y <- hybrid2$Yield
X1 <- model.matrix(~ Location, data = hybrid2);dim(X1)
Z1 <- model.matrix(~ GCA1 -1, data = hybrid2);dim(Z1)
Z2 <- model.matrix(~ GCA2 -1, data = hybrid2);dim(Z2)
Z3 <- model.matrix(~ SCA -1, data = hybrid2);dim(Z3)
K1 <- A[levels(hybrid2$GCA1), levels(hybrid2$GCA1)]; dim(K1)
# Realized IBS relationships for set of parents 1
K2 <- A[levels(hybrid2$GCA2), levels(hybrid2$GCA2)]; dim(K2)
# Realized IBS relationships for set of parents 2
S <- kronecker(K1, K2) ; dim(S)
rownames(S) <- colnames(S) <- levels(hybrid2$SCA)
# Realized IBS relationships for cross
#(as the Kronecker product of K1 and K2)
ETA <- list(list(Z=Z1, K=K1), list(Z=Z2, K=K2), list(Z=Z3, K=S))
# run the next line, ommited for CRAN time limitations
# ans <- mmer(y=y, X=X1, Z=ETA)
# ans$var.comp
# summary(ans)
#############################
## COMPARE WITH MCMCglmm ##
#############################
##== the same model run in MCMCglmm:
#library(MCMCglmm)
# pro <- list(GCA1 = as(solve(K1), "sparseMatrix"), GCA2 = as(solve(K2),
# + "sparseMatrix"), SCA = as(solve(S), "sparseMatrix") )
#system.time(mox <- MCMCglmm(Yield ~ Location, random = ~ GCA1 + GCA2 + SCA,
# + data = hybrid2, verbose = T, ginverse=pro))
## Takes 7:13 minutes in MCMCglmm, in sommer only takes 7 seconds
## it is also possible to do GWAS for hybrids, separatting and accounting
## for effects of GCA1, GCA2, SCA
#############################
## COMPARE WITH pedigreemm ##
#############################
# library(pedigreemm)
#A <- as.matrix(getA(pedCowsR))
#y <- milk$milk
#Z1 <- model.matrix(~id-1, data=milk); dim(Z1)
#vv <- match(unique(milk$id), gsub("id","",colnames(Z1)))
#K1<- A[vv,vv]; dim(K1)
#Z2 <- model.matrix(~as.factor(herd)-1, data=milk); dim(Z2)
#ETA<- list(list(Z=Z1, K=K1),list(Z=Z2))
#fm3 <- mmer(y=y, Z=ETA) # or using mmer2 would look:
#fm3 <- mmer2(fixed=milk ~ 1, random = ~ id + herd,
# + G=list(id=K1), data=milk, draw=FALSE)
#summary(fm3)
# Try pedigreemm but takes longer, is an extension of lme4
#fm2 <- pedigreemm(milk ~ (1 | id) + (1 | herd),data = milk, pedigree = list(id= pedCowsR))
#plot(fm3$u.hat[[1]], ranef(fm2)$id[,1])
#plot(fm3$u.hat[[2]], ranef(fm2)$herd[,1])
# a big data frame with 3397 rows and 1359 animals analyzed
# pedigreemm takes 4 min, sommer takes 1 minute
#####################################
## PREDICTING SPECIFIC PERFORMANCE ##
## within biparental population ##
#####################################
#data(CPdata)
#CPpheno <- CPdata$pheno
#CPgeno <- CPdata$geno
## look at the data
#head(CPpheno)
#CPgeno[1:5,1:5]
## fit a model including additive and dominance effects
#y <- CPpheno$color
#Za <- diag(length(y))
#Zd <- diag(length(y))
#A <- A.mat(CPgeno)
#D <- D.mat(CPgeno)
#y.trn <- y # for prediction accuracy
#ww <- sample(c(1:dim(Za)[1]),72) # delete data for 1/5 of the population
#y.trn[ww] <- NA
### ADDITIVE MODEL ###
#ETA.A <- list(list(Z=Za,K=A))
#ans.A <- mmer(y=y.trn, Z=ETA.A)
#cor(ans.A$fitted.y[ww], y[ww], use="pairwise.complete.obs")
### ADDITIVE-DOMINANT MODEL ###
#ETA.AD <- list(list(Z=Za,K=A),list(Z=Zd,K=D))
#ans.AD <- mmer(y=y.trn, Z=ETA.AD)
#cor(ans.AD$fitted.y[ww], y[ww], use="pairwise.complete.obs")
### 0.63 accuracy !!!! 4 percent increment!!
Run the code above in your browser using DataLab