# \donttest{
#DATA###########################################################################
data(sampledata)
dim(X) #Matrix of SNP genotypes (explanatory variables)
dim(Z) #Matrix of a fixed effect (explanatory variables)
length(Y) #Vector of response variables
#Fitting########################################################################
#Example 1: Fit SNP genotypes with BayesC
ETA <- list(list(model = "BayesC", X = X))
Result <- vigor(Y, ETA)
##see estimated SNP effects
plot(abs(Result$ETA[[1]]$Beta), pch = 20)
##10 SNPs at 1, 101, ..., 901 have non-zero effects
abline(v = seq(1, 1000, 100), col = 2, lty = 2)
##see inclusion probability
plot(Result$ETA[[1]]$Rho, pch = 20)
abline(v = seq(1, 1000, 100), col = 2, lty = 2)
##Intercept is added automatically as the last learner
Result$ETA[[2]]$Beta
#Example 2: Fit fixed effects and SNP genotypes
##There are two approaches to fit fixed effects
##(1) Create model matrix
Z #Z consists of three categories (A, B, and C)
Z.matrix <- model.matrix(~ Z)
head(Z.matrix) #The first column is the intercept
##Fit with EBL
ETA <- list(list(model = "FIXED", X = Z.matrix),
list(model = "EBL", X = X))
Result <- vigor(Y, ETA)
##Estimated fixed effects (intercept, B, and C)
Result$ETA[[1]]$Beta
##Estimated SNP effects
plot(abs(Result$ETA[[2]]$Beta), pch = 20)
abline(v = seq(1, 1000, 100), col = 2, lty = 2)
##NOTE: when FIXED is added by user, the intercept is not automatically added.
##Thus, variables in FIXED should contain the intercept.
##(2) Use formula
Data <- data.frame(Z = factor(Z))
ETA <- list(list(~ Z, model = "FIXED", data = Data),
list(model = "BayesA", X = X))
Result <- vigor(Y, ETA)
##Estimated fixed effects (intercept, B, and C)
Result$ETA[[1]]$Beta
plot(abs(Result$ETA[[2]]$Beta), pch = 20)
abline(v=seq(1,1000,100),col=2,lty=2)
##NOTE: formula automatically adds the intercept
#Example 3: Multiple regression methods in a single model
##Some SNPs in X have dominance (non-additive) effects
##Fit SNP genotypes coded as additive and dominance with different shrinkage levels
X.d <- X
X.d[X == 2] <- 0 #heterozygotes are 1 and homozygotes are 0
ETA <- list(list(~ Z, model = "FIXED", data = Data),
list(model = "BayesC", X = X, H = c(5, 0.1, 0.01)),
list(model = "BayesC", X = X.d, H = c(5, 0.1, 0.001)))
Result <- vigor(Y, ETA)
##Inclusion probability for additive effects
plot(Result$ETA[[2]]$Rho, pch = 20)
abline(v = seq(1, 1000, 100), col = 2, lty = 2)
##Inclusion probability for dominance effects
plot(Result$ETA[[3]]$Rho, pch = 20)
##SNPs at 1, 201, ..., 801 have non-zero effects
abline(v = seq(1, 1000, 200), col = 2, lty = 2)
##Fit additive and dominance effects with different learners
ETA <- list(list(~ Z, model = "FIXED", data = Data),
list(model = "BL", X = X, H = c(1, 0.01)),
list(model = "BayesC", X = X.d, H = c(5, 0.1, 0.001)))
Result <- vigor(Y, ETA)
plot(abs(Result$ETA[[2]]$Beta), pch = 20)
abline(v = seq(1, 1000, 100), col = 2, lty = 2)
plot(Result$ETA[[3]]$Rho, pch = 20)
abline(v = seq(1, 1000, 200), col = 2, lty = 2)
#Tuning hyperparameters#########################################################
#Example 4: Model fitting after hyperparameter tuning with cross-validation
##Candidate hyperparameter values are determined with hyperpara
##Use BayesB
ETA <- list(list(~ Z, model = "FIXED", data = Data),
list(model = "BayesB", X = X,
H = hyperpara(X, 0.5, "BayesB", c(0.1,0.01))))
Result <- vigor(Y, ETA, Function = "tuning")
##See the tuned result
Result$Metrics
##The model was fitted to the full data with the best set
Result$H
plot(Result$ETA[[2]]$Rho, pch = 20)
abline(v = seq(1, 1000, 100), col = 2, lty = 2)
##See how much the model was fitted
plot(Y, Result$Fittedvalue); abline(0, 1)
##When multiple learners used, all combinations of hyperparameter sets are compared
ETA <- list(list(~ Z, model = "FIXED", data = Data),
list(model = "BayesB", X = X,
H = hyperpara(X, 0.5, "BayesB", c(0.1,0.01))),
list(model = "BayesC", X = X.d,
H = hyperpara(X, 0.5, "BayesC", c(0.1,0.01))))
Result <- vigor(Y, ETA, Function = "tuning")
##BayesB and BayesC have two candidate sets, respectively.
##Thus, total 2 x 2 = 4 combinations are compared.
Result$Metrics
##The model was fitted to the full data with the best combination.
Result$H
plot(Result$ETA[[2]]$Rho, pch = 20)
abline(v = seq(1, 1000, 100), col = 2, lty = 2)
plot(Result$ETA[[3]]$Rho, pch = 20)
abline(v = seq(1, 1000, 200), col = 2, lty = 2)
#Cross-validation###############################################################
#Example 5: Cross-validation with random splitting
ETA <- list(list(~ Z, model = "FIXED", data = Data),
list(model = "BayesC", X = X,
H = hyperpara(X, 0.5, "BayesC", c(0.1,0.01))))
Result <- vigor(Y, ETA, Function="cv")
##Because two hyperparameter sets are provided,
##nested CV is conducted at each fold to tune hyperparameters
##See which set was selected at each fold
Result$Metrics
##See predicted values
plot(Y, Result$Prediction)
cor(Y, Result$Prediction)
#Example 6: Cross-validation with the specified splitting
##Perform CV using the same partition as Example 5. Use EBL
ETA <- list(list(~ Z, model = "FIXED", data = Data),
list(model = "EBL", X = X))
Result2 <- vigor(Y, ETA, Function="cv", Partition = Result$Partition)
plot(Y, Result2$Prediction)
cor(Y, Result2$Prediction)
# }
Run the code above in your browser using DataLab