Learn R Programming

plsRglm (version 0.3.3)

plsRglm: Partial least squares Regression generalized linear models

Description

This function implements Partial least squares Regression generalized linear models complete or incomplete datasets.

Usage

plsRglm(dataY, dataX, nt = 2, limQ2set = 0.0975, dataPredictY = dataX, modele = "pls", family = NULL, typeVC = "none", EstimXNA = FALSE, scaleX = TRUE, scaleY = NULL, pvals.expli = FALSE, alpha.pvals.expli = 0.05, MClassed = FALSE, tol_Xi = 10^(-12))

Arguments

dataY
response (training) dataset
dataX
predictor(s) (training) dataset
nt
number of components to be extracted
limQ2set
limit value for the Q2
dataPredictY
predictor(s) (testing) dataset
modele
name of the PLS glm model to be fitted ("pls", "pls-glm-gaussian", "pls-glm-logistic", "pls-glm-polr").
family
for the present moment the family argumlent is ignored and set thanks to the value of modele.
typeVC
type of leave one out crossed validation. For back compatibility purpose. [object Object],[object Object],[object Object],[object Object]
EstimXNA
only for modele="pls". Set whether the missing X values have to be estimated.
scaleX
scale the predictor(s) : must be set to TRUE for modele="pls" and should be for glms pls.
scaleY
scale the response : Yes/No. Ignored since non always possible for glm responses.
pvals.expli
should individual p-values be reported to tune model selection ?
alpha.pvals.expli
level of significance for predictors when pvals.expli=TRUE
MClassed
number of missclassed cases, should only be used for binary responses
tol_Xi
minimal value for Norm2(Xi) and $\mathrm{det}(pp' \times pp)$ if there is any missing value in the dataX. It defaults to $10^{-12}$

Value

  • Depends on the model that was used to fit the model.

Details

There are four different models available : [object Object],[object Object],[object Object],[object Object]

References

Nicolas Meyer, Myriam Maumy-Bertrand et Fr�d�ric{Fr'ed'eric} Bertrand (2010). Comparaison de la r�gression{r'egression} PLS et de la r�gression{r'egression} logistique PLS : application aux donn�es{donn'ees} d'all�lotypage{d'all'elotypage}. Journal de la Soci�t� Fran�aise de Statistique, 151(2), pages 1-18. http://smf4.emath.fr/Publications/JSFdS/151_2/pdf/sfds_jsfds_151_2_1-18.pdf

See Also

plsR

Examples

Run this code
data(Cornell)
XCornell<-Cornell[,1:7]
yCornell<-Cornell[,8]
plsRglm(yCornell,XCornell,3)$uscores
plsRglm(yCornell,XCornell,3)$pp
plsRglm(yCornell,XCornell,3)$Coeffs
plsRglm(yCornell,XCornell,10)$InfCrit
plsRglm(yCornell,XCornell,10,modele="pls-glm-gaussian")$InfCrit
rm(list=c("XCornell","yCornell"))

data(pine)
Xpine<-pine[,1:10]
ypine<-pine[,11]
plsRglm(log(ypine),Xpine,1)$Std.Coeffs
plsRglm(log(ypine),Xpine,1)$Coeffs
plsRglm(log(ypine),Xpine,4)$Std.Coeffs
plsRglm(log(ypine),Xpine,4)$Coeffs
plsRglm(log(ypine),Xpine,4)$PredictY[1,]
plsRglm(log(ypine),Xpine,4,dataPredictY=Xpine[1,])$PredictY[1,]

XpineNAX21 <- Xpine
XpineNAX21[1,2] <- NA
str(plsRglm(log(ypine),XpineNAX21,2))
plsRglm(log(ypine),XpineNAX21,4)$Std.Coeffs
plsRglm(log(ypine),XpineNAX21,4)$YChapeau[1,]
plsRglm(log(ypine),Xpine,4)$YChapeau[1,]
plsRglm(log(ypine),XpineNAX21,4)$CoeffC
plsRglm(log(ypine),XpineNAX21,4,EstimXNA=TRUE)$XChapeau
plsRglm(log(ypine),XpineNAX21,4,EstimXNA=TRUE)$XChapeauNA

# compare pls-glm-gaussian with classic plsR
cbind(plsRglm(log(ypine),Xpine,4,modele="pls")$Std.Coeffs,plsRglm(log(ypine),Xpine,4,modele="pls-glm-gaussian")$Std.Coeffs)

# without missing data
cbind(log(ypine),plsRglm(log(ypine),Xpine,4,modele="pls")$YChapeau,plsRglm(log(ypine),Xpine,4,modele="pls-glm-gaussian")$YChapeau)
cbind(log(ypine),plsRglm(log(ypine),XpineNAX21,4,modele="pls")$YChapeau,plsRglm(log(ypine),XpineNAX21,4,modele="pls-glm-gaussian")$YChapeau)

# with missing data
cbind((log(ypine)),plsRglm(log(ypine),XpineNAX21,4,modele="pls")$YChapeau,plsRglm(log(ypine),XpineNAX21,4,modele="pls-glm-gaussian")$YChapeau)
cbind((log(ypine)),plsRglm(log(ypine),XpineNAX21,4,modele="pls")$ValsPredictY,plsRglm(log(ypine),XpineNAX21,4,modele="pls-glm-gaussian")$ValsPredictY)
rm(list=c("Xpine","ypine"))

data(fowlkes)
Xfowlkes <- fowlkes[,2:13]
yfowlkes <- fowlkes[,1]
modpls <- plsRglm(yfowlkes,Xfowlkes,4,modele="pls-glm-logistic",pvals.expli=TRUE)
modpls$pvalstep
rm(list=c("Xfowlkes","yfowlkes","modpls"))

data(aze_compl)
Xaze_compl<-aze_compl[,2:34]
yaze_compl<-aze_compl$y
plsRglm(yaze_compl,Xaze_compl,nt=10,modele="pls",MClassed=TRUE)$InfCrit
modpls <- plsRglm(yaze_compl,Xaze_compl,nt=10,modele="pls-glm-logistic",MClassed=TRUE,pvals.expli=TRUE)
modpls$InfCrit
modpls$valpvalstep
modpls$Coeffsmodel_vals

plot(plsRglm(yaze_compl,Xaze_compl,4,modele="pls-glm-logistic")$FinalModel)
plsRglm(yaze_compl[-c(99,72)],Xaze_compl[-c(99,72),],4,modele="pls-glm-logistic",pvals.expli=TRUE)$pvalstep
plot(plsRglm(yaze_compl[-c(99,72)],Xaze_compl[-c(99,72),],4,modele="pls-glm-logistic",pvals.expli=TRUE)$FinalModel)
rm(list=c("Xaze_compl","yaze_compl","modpls"))


data(bordeaux)
Xbordeaux<-bordeaux[,1:4]
ybordeaux<-factor(bordeaux$Quality,ordered=TRUE)
modpls <- plsRglm(ybordeaux,Xbordeaux,10,modele="pls-glm-polr")
modpls$Coeffsmodel_vals
modpls$InfCrit

XbordeauxNA<-Xbordeaux
XbordeauxNA[1,1] <- NA
modplsNA <- plsRglm(ybordeaux,XbordeauxNA,10,modele="pls-glm-polr")
modplsNA$Coeffsmodel_vals
modplsNA$InfCrit
rm(list=c("Xbordeaux","XbordeauxNA","ybordeaux","modplsNA"))


#install.packages(chemometrics)
library(chemometrics)
data(hyptis)
yhyptis <- factor(hyptis$Group,ordered=TRUE)
Xhyptis <- as.data.frame(hyptis[,c(1:6)])
options(contrasts = c("contr.treatment", "contr.poly"))
modpls2 <- plsRglm(yhyptis,Xhyptis,6,modele="pls-glm-polr")
modpls2$Coeffsmodel_vals
modpls2$InfCrit
modpls2$Coeffs
modpls2$std.coeffs

table(yhyptis,predict(modpls2$FinalModel,type="class"))
rm(list=c("yhyptis","Xhyptis","modpls2"))


dimX <- 6
Astar <- 4
dataAstar4 <- t(replicate(250,simul_data_UniYX(dimX,Astar)))
ysimbin1 <- dicho(dataAstar4)[,1]
Xsimbin1 <- dicho(dataAstar4)[,2:(dimX+1)]
modplsglm <- plsRglm(ysimbin1,Xsimbin1,10,modele="pls-glm-logistic")
modplsglm$computed_nt
modplsglm$InfCrit
rm(list=c("dimX","Astar","dataAstar4","ysimbin1","Xsimbin1","modplsglm"))


dimX <- 24
Astar <- 2
dataAstar2 <- t(replicate(250,simul_data_UniYX(dimX,Astar)))
ysimbin1 <- dicho(dataAstar2)[,1]
Xsimbin1 <- dicho(dataAstar2)[,2:(dimX+1)]
modplsglm <- plsRglm(ysimbin1,Xsimbin1,10,modele="pls-glm-logistic")
modplsglm$computed_nt
modplsglm$InfCrit
rm(list=c("dimX","Astar","dataAstar2","ysimbin1","Xsimbin1","modplsglm"))


dimX <- 24
Astar <- 3
dataAstar3 <- t(replicate(250,simul_data_UniYX(dimX,Astar)))
ysimbin1 <- dicho(dataAstar3)[,1]
Xsimbin1 <- dicho(dataAstar3)[,2:(dimX+1)]
modplsglm <- plsRglm(ysimbin1,Xsimbin1,10,modele="pls-glm-logistic")
modplsglm$computed_nt
modplsglm$InfCrit
rm(list=c("dimX","Astar","dataAstar3","ysimbin1","Xsimbin1","modplsglm"))


dimX <- 24
Astar <- 4
dataAstar4 <- t(replicate(250,simul_data_UniYX(dimX,Astar)))
ysimbin1 <- dicho(dataAstar4)[,1]
Xsimbin1 <- dicho(dataAstar4)[,2:(dimX+1)]
modplsglm <- plsRglm(ysimbin1,Xsimbin1,10,modele="pls-glm-logistic")
modplsglm$computed_nt
modplsglm$InfCrit
rm(list=c("dimX","Astar","dataAstar4","ysimbin1","Xsimbin1","modplsglm"))


dimX <- 24
Astar <- 5
dataAstar5 <- t(replicate(250,simul_data_UniYX(dimX,Astar)))
ysimbin1 <- dicho(dataAstar5)[,1]
Xsimbin1 <- dicho(dataAstar5)[,2:(dimX+1)]
modplsglm <- plsRglm(ysimbin1,Xsimbin1,10,modele="pls-glm-logistic")
modplsglm$computed_nt
modplsglm$InfCrit
rm(list=c("dimX","Astar","dataAstar5","ysimbin1","Xsimbin1","modplsglm"))


dimX <- 24
Astar <- 6
dataAstar6 <- t(replicate(250,simul_data_UniYX(dimX,Astar)))
ysimbin1 <- dicho(dataAstar6)[,1]
Xsimbin1 <- dicho(dataAstar6)[,2:(dimX+1)]
modplsglm <- plsRglm(ysimbin1,Xsimbin1,10,modele="pls-glm-logistic")
modplsglm$computed_nt
modplsglm$InfCrit
rm(list=c("dimX","Astar","dataAstar6","ysimbin1","Xsimbin1","modplsglm"))

Run the code above in your browser using DataLab