SVDgen: SVD with metrics and smoothing approximation

Description

Computes the generalised Singular Value Decomposition, i.e. with non-identity metrics. A smooth approximation can be asked to constraint the components (u and v) to be smooth.

Usage

SVDgen(Y, D2 = 1, D1 = 1, smoothing = FALSE, nomb = NULL,
                  smoo = list(function(u)ksmooth( 1:length(u), u, kernel = "normal",
                           bandwidth = 3, x.points = (1:length(u)))$y))

Arguments

a matrix $n \times p$

metric in $R^p$ either a vector ($p \times 1$) or a matrix ($p \times p$)

metric in $R^n$ either a vector ($n \times 1$) or a matrix ($n \times n$)

smoothing

logical if TRUE the smoothing methods in smoo are used

nomb

numeric number of components to extract (typically when smoothing is used less components are used as the screeplot becomes flatter faster)

smoo

list of lists of smoothing functions on a vector of the approriate dimension; if on one dimension it is NA no smoothing will be done for this one; if the length of a list is one the function is used for all components. If only one list in the

Value

a PTAk object

encoding

utf-8

Details

The function computes the decomposition $X=UL^{1/2}V'$ where $U'D_1U=Id_p$ and $V'D_2V=Id_p$ and the diagonal matrix $L$ containing no zeros squared singular values. If smoothing a constraint on Least Squares solution is used, then the above decomposition becomes an approximation (unless X belongs to the space defined by the constraints). A Power Method algorithm to compute each principal tensor is used wherein Alternated Least Squares are always followed by a smoothed version of the updated vectors. If a Spline smoothing was used the algorithm would be equivalent to use the traditional penalised least squares at each iteration and could be called Penalised Power Method or Splined Alternated Least Squares Algorithm (SALSA is already an acronym used by Besse and Ferraty (1995) in where a similar idea is developped: but smoothing operates only on variables, and ismodel based as the Alternating operates on the whole approximation i.e. given the choice of the dimension reduction).

References

Leibovici D and El Maache H (1997) Une décomposition en Valeurs Singulières d'un élément d'un produit Tensoriel de k espaces de Hilbert Séparables. Compte Rendus de l'Académie des Sciences tome 325, série I, Statistiques (Statistics) & Probabilités (Probability Theory): 779-782.

Besse P and Ferraty F (1995) Curvilinear fixed effect model. Computational Statistics, 10:339-351.

Leibovici D (2008) A Simple Penalised algorithm for SVD and Multiway functional methods. (to be submitted)

Ramsay J.O. and Silverman B.W. (1997) Functional Data Analysis. Springer Series in Statistics.

Examples

Run this code

#library(stats)
 #library(tensor)

 # on smoothing

 data(longley)
 long <- as.matrix(longley[,1:7])

 long.svd <- SVDgen(long,smoothing=FALSE)
  summary.PTAk(long.svd,testvar=0)
   # X11(width=4,height=4)
  plot.PTAk(long.svd,scree=TRUE,RiskJack=0,type="b",lty=3)

 long.svdo <- SVDgen(long,smoothing=TRUE,
  smoo=list(function(u)ksmooth(1:length(u),
      u,kernel="normal",bandwidth=3,x.points=(1:length(u)))$y,NA))

  summary.PTAk(long.svdo,testvar=0)
  #  X11(width=4,height=4)
  plot.PTAk(long.svdo,scree=TRUE,RiskJack=0,type="b",lty=3)
 ###using polynomial fitting
   polyfit <- function(u,deg=length(u)/5)
       {n <- length(u);time <- rep(1,n);
        for(e in 1:deg)time<-cbind(time,(1:n)^e);return(lm.fit(time,u)$fitted.values)}
bsfit<-function(u,deg=42)
       {n <- length(u);time <- rep(1,n);
        return(lm.fit(bs(time,df=deg),u)$fitted.values)}

###
 long.svdo2 <- SVDgen(long,nomb=4,smoothing=TRUE,smoo=list(polyfit,NA))
  long.svdo2[[1]]$v[1:3,]long.svdo[[1]]$v[1:3,]# orthogonality may be lost with non-projective smoother

     ####
comtoplot <- function(com=1,solua=long.svd,solub=long.svdo,openX11s=FALSE,...)
         {
  if(openX11s)X11(width=4,height=4)
 yla <- c(round((100*(solua[[2]]$d[com])^2)/
     solua[[2]]$ssX[1],4),
     round((100*(solub[[2]]$d[com])^2)/solua[[2]]$ssX[1],4))

limi <- range(c(solua[[1]]$v[com,],solub[[1]]$v[com,]))
  plot(solua,nb1=com, mod=1,type="b",lty=3,lengthlabels=4,cex=0.4,
   ylimit=limi,ylab="",...)
mtext(paste("vs",com,":",yla[1],"%"),2,col=2,line=2)
 par(new=TRUE)

  plot.PTAk(solub,nb1=com,mod=1,labels=FALSE,type="b",lty=1,
  lengthlabels=4,cex=0.6,ylimit=limi,ylab="",main=paste("smooth vs",com,":",yla[2],"%"),...)
  par(new=FALSE)
}   ####
 comtoplot(com=1)



#  on using non-diagonal metrics

 data(crimerate)
  crimerate.mat <- sweep(crimerate,2,apply(crimerate,2,mean))
  crimerate.mat <- sweep(crimerate.mat,2,sqrt(apply(crimerate.mat,2,var)),FUN="/")
   metW <- Powmat(CauRuimet(crimerate.mat),(-1))
   # inverse of the within "group" (to play a bit more you could set m0 relating
   # the neighbourhood of states (see CauRuimet)

  cri.svd <- SVDgen(crimerate.mat,D2=1,D1=1)
  summary(cri.svd,testvar=0)
   plot(cri.svd,scree=TRUE,RiskJack=0,type="b",lty=3)
  cri.svdo <- SVDgen(crimerate.mat,D2=metW,D1=1)
   summary(cri.svdo,testvar=0)
   plot(cri.svdo,scree=TRUE,RiskJack=0,type="b",lty=3)
  # X11(width=8,height=4)
  par(mfrow=c(1,2))
   plot(cri.svd,nb1=1,nb2=2,mod=1,lengthlabels=3)
  plot(cri.svd,nb1=1,nb2=2,mod=2,lengthlabels=4,main="canonical")
  # X11(width=8,height=4)
  par(mfrow=c(1,2))
 plot(cri.svdo,nb1=1,nb2=2,mod=1,lengthlabels=3)
 plot(cri.svdo,nb1=1,nb2=2,mod=2,lengthlabels=4,
       main=expression(paste("metric ",Wg^{-1})))

###########
#  demo function
 # when ima is NULL it uses the dataset timage12 but you can put any array
 # demo.SVDgen(ima=NULL,snr=3,openX11s=TRUE)

Run the code above in your browser using DataLab