Learn R Programming

plsRglm (version 0.7.4)

PLS_v2: Partial least squares Regression generalized linear models

Description

This function implements Partial least squares Regression generalized linear models complete or incomplete datasets.

Usage

PLS_v2(dataY, dataX, nt = 2, limQ2set = 0.0975, dataPredictY = dataX, modele = "pls", family = NULL, typeVC = "none", EstimXNA = FALSE, scaleX = TRUE, scaleY = NULL, pvals.expli = FALSE, alpha.pvals.expli = 0.05, MClassed = FALSE, tol_Xi = 10^(-12))

Arguments

dataY
response (training) dataset
dataX
predictor(s) (training) dataset
nt
number of components to be extracted
limQ2set
limit value for the Q2
dataPredictY
predictor(s) (testing) dataset
modele
name of the PLS glm model to be fitted ("pls", "pls-glm-gaussian", "pls-glm-logistic", "pls-glm-polr").
family
for the present moment the family argument is ignored and set thanks to the value of modele.
typeVC
type of leave one out cross validation. For back compatibility purpose. [object Object],[object Object],[object Object],[object Object]
EstimXNA
only for modele="pls". Set whether the missing X values have to be estimated.
scaleX
scale the predictor(s) : must be set to TRUE for modele="pls" and should be for glms pls.
scaleY
scale the response : Yes/No. Ignored since not always possible for glm responses.
pvals.expli
should individual p-values be reported to tune model selection ?
alpha.pvals.expli
level of significance for predictors when pvals.expli=TRUE
MClassed
number of missclassified cases, should only be used for binary responses
tol_Xi
minimal value for Norm2(Xi) and $\mathrm{det}(pp' \times pp)$ if there is any missing value in the dataX. It defaults to $10^{-12}$

Value

  • Depends on the model that was used to fit the model.

Details

There are four different models available : [object Object],[object Object],[object Object],[object Object]

References

Nicolas Meyer, Myriam Maumy-Bertrand et Fr�d�ric{Fr'ed'eric} Bertrand (2010). Comparaison de la r�gression{r'egression} PLS et de la r�gression{r'egression} logistique PLS : application aux donn�es{donn'ees} d'all�lotypage{d'all'elotypage}. Journal de la Soci�t� Fran�aise de Statistique, 151(2), pages 1-18. http://smf4.emath.fr/Publications/JSFdS/151_2/pdf/sfds_jsfds_151_2_1-18.pdf

See Also

PLS_v2_wvc and PLS_v2_kfoldcv

Examples

Run this code
data(Cornell)
XCornell<-Cornell[,1:7]
yCornell<-Cornell[,8]
PLS_v2(yCornell,XCornell,3)$uscores
PLS_v2(yCornell,XCornell,3)$pp
PLS_v2(yCornell,XCornell,3)$Coeffs
PLS_v2(yCornell,XCornell,10)$InfCrit
PLS_v2(yCornell,XCornell,10,modele="pls-glm-gaussian")$InfCrit
rm(list=c("XCornell","yCornell"))


data(pine)
Xpine<-pine[,1:10]
ypine<-pine[,11]
PLS_v2(log(ypine),Xpine,1)$Std.Coeffs
PLS_v2(log(ypine),Xpine,1)$Coeffs
PLS_v2(log(ypine),Xpine,4)$Std.Coeffs
PLS_v2(log(ypine),Xpine,4)$Coeffs
PLS_v2(log(ypine),Xpine,4)$PredictY[1,]
PLS_v2(log(ypine),Xpine,4,dataPredictY=Xpine[1,])$PredictY[1,]

XpineNAX21 <- Xpine
XpineNAX21[1,2] <- NA
str(PLS_v2(log(ypine),XpineNAX21,2))
PLS_v2(log(ypine),XpineNAX21,4)$Std.Coeffs
PLS_v2(log(ypine),XpineNAX21,4)$YChapeau[1,]
PLS_v2(log(ypine),Xpine,4)$YChapeau[1,]
PLS_v2(log(ypine),XpineNAX21,4)$CoeffC
PLS_v2(log(ypine),XpineNAX21,4,EstimXNA=TRUE)$XChapeau
PLS_v2(log(ypine),XpineNAX21,4,EstimXNA=TRUE)$XChapeauNA

# compare pls-glm-gaussian with classic plsR
cbind(PLS_v2(log(ypine),Xpine,4,modele="pls")$Std.Coeffs,PLS_v2(log(ypine),Xpine,4,modele="pls-glm-gaussian")$Std.Coeffs)

# without missing data
cbind(log(ypine),PLS_v2(log(ypine),Xpine,4,modele="pls")$YChapeau,PLS_v2(log(ypine),Xpine,4,modele="pls-glm-gaussian")$YChapeau)
cbind(log(ypine),PLS_v2(log(ypine),XpineNAX21,4,modele="pls")$YChapeau,PLS_v2(log(ypine),XpineNAX21,4,modele="pls-glm-gaussian")$YChapeau)

# with missing data
cbind((log(ypine)),PLS_v2(log(ypine),XpineNAX21,4,modele="pls")$YChapeau,PLS_v2(log(ypine),XpineNAX21,4,modele="pls-glm-gaussian")$YChapeau)
cbind((log(ypine)),PLS_v2(log(ypine),XpineNAX21,4,modele="pls")$ValsPredictY,PLS_v2(log(ypine),XpineNAX21,4,modele="pls-glm-gaussian")$ValsPredictY)
rm(list=c("Xpine","ypine"))


data(fowlkes)
Xfowlkes <- fowlkes[,2:13]
yfowlkes <- fowlkes[,1]
modpls <- PLS_v2(yfowlkes,Xfowlkes,4,modele="pls-glm-logistic",pvals.expli=TRUE)
modpls$pvalstep
rm(list=c("Xfowlkes","yfowlkes","modpls"))


data(aze_compl)
Xaze_compl<-aze_compl[,2:34]
yaze_compl<-aze_compl$y
PLS_v2(yaze_compl,Xaze_compl,nt=10,modele="pls",MClassed=TRUE)$InfCrit
modpls <- PLS_v2(yaze_compl,Xaze_compl,nt=10,modele="pls-glm-logistic",MClassed=TRUE,pvals.expli=TRUE)
modpls$InfCrit
modpls$valpvalstep
modpls$Coeffsmodel_vals

plot(PLS_v2(yaze_compl,Xaze_compl,4,modele="pls-glm-logistic")$FinalModel)
PLS_v2(yaze_compl[-c(99,72)],Xaze_compl[-c(99,72),],4,modele="pls-glm-logistic",pvals.expli=TRUE)$pvalstep
plot(PLS_v2(yaze_compl[-c(99,72)],Xaze_compl[-c(99,72),],4,modele="pls-glm-logistic",pvals.expli=TRUE)$FinalModel)
rm(list=c("Xaze_compl","yaze_compl","modpls"))


data(bordeaux)
Xbordeaux<-bordeaux[,1:4]
ybordeaux<-factor(bordeaux$Quality,ordered=TRUE)
modpls <- PLS_v2(ybordeaux,Xbordeaux,10,modele="pls-glm-polr")
modpls$Coeffsmodel_vals
modpls$InfCrit

XbordeauxNA<-Xbordeaux
XbordeauxNA[1,1] <- NA
modplsNA <- PLS_v2(ybordeaux,XbordeauxNA,10,modele="pls-glm-polr")
modplsNA$Coeffsmodel_vals
modplsNA$InfCrit
rm(list=c("Xbordeaux","XbordeauxNA","ybordeaux"))


dimX <- 6
Astar <- 4
dataAstar4 <- t(replicate(250,simul_data_UniYX(dimX,Astar)))
ysimbin1 <- dicho(dataAstar4)[,1]
Xsimbin1 <- dicho(dataAstar4)[,2:(dimX+1)]
modplsglm <- PLS_v2(ysimbin1,Xsimbin1,10,modele="pls-glm-logistic")
modplsglm$computed_nt
modplsglm$InfCrit
rm(list=c("dimX","Astar","dataAstar4","ysimbin1","Xsimbin1","modplsglm"))


dimX <- 24
Astar <- 2
dataAstar2 <- t(replicate(250,simul_data_UniYX(dimX,Astar)))
ysimbin1 <- dicho(dataAstar2)[,1]
Xsimbin1 <- dicho(dataAstar2)[,2:(dimX+1)]
modplsglm <- PLS_v2(ysimbin1,Xsimbin1,10,modele="pls-glm-logistic")
modplsglm$computed_nt
modplsglm$InfCrit
rm(list=c("dimX","Astar","dataAstar2","ysimbin1","Xsimbin1","modplsglm"))


dimX <- 24
Astar <- 3
dataAstar3 <- t(replicate(250,simul_data_UniYX(dimX,Astar)))
ysimbin1 <- dicho(dataAstar3)[,1]
Xsimbin1 <- dicho(dataAstar3)[,2:(dimX+1)]
modplsglm <- PLS_v2(ysimbin1,Xsimbin1,10,modele="pls-glm-logistic")
modplsglm$computed_nt
modplsglm$InfCrit
rm(list=c("dimX","Astar","dataAstar3","ysimbin1","Xsimbin1","modplsglm"))


dimX <- 24
Astar <- 4
dataAstar4 <- t(replicate(250,simul_data_UniYX(dimX,Astar)))
ysimbin1 <- dicho(dataAstar4)[,1]
Xsimbin1 <- dicho(dataAstar4)[,2:(dimX+1)]
modplsglm <- PLS_v2(ysimbin1,Xsimbin1,10,modele="pls-glm-logistic")
modplsglm$computed_nt
modplsglm$InfCrit
rm(list=c("dimX","Astar","dataAstar4","ysimbin1","Xsimbin1","modplsglm"))


dimX <- 24
Astar <- 5
dataAstar5 <- t(replicate(250,simul_data_UniYX(dimX,Astar)))
ysimbin1 <- dicho(dataAstar5)[,1]
Xsimbin1 <- dicho(dataAstar5)[,2:(dimX+1)]
modplsglm <- PLS_v2(ysimbin1,Xsimbin1,10,modele="pls-glm-logistic")
modplsglm$computed_nt
modplsglm$InfCrit
rm(list=c("dimX","Astar","dataAstar5","ysimbin1","Xsimbin1","modplsglm"))


dimX <- 24
Astar <- 6
dataAstar6 <- t(replicate(250,simul_data_UniYX(dimX,Astar)))
ysimbin1 <- dicho(dataAstar6)[,1]
Xsimbin1 <- dicho(dataAstar6)[,2:(dimX+1)]
modplsglm <- PLS_v2(ysimbin1,Xsimbin1,10,modele="pls-glm-logistic")
modplsglm$computed_nt
modplsglm$InfCrit
rm(list=c("dimX","Astar","dataAstar6","ysimbin1","Xsimbin1","modplsglm"))

Run the code above in your browser using DataLab