squarem: Squared Extrapolation Methods for Accelerating Slowly-Convergent Fixed-Point Iterations

Description

Globally-convergent, partially monotone, acceleration schemes for accelerating the convergence of any smooth, monotone, slowly-converging contraction mapping. It can be used to accelerate the convergence of a wide variety of iterations including the expectation-maximization (EM) algorithms and its variants, majorization-minimization (MM) algorithm, power method for dominant eigenvalue-eigenvector, Google's page-rank algorithm, and multi-dimensional scaling.

Usage

squarem(par, fixptfn, objfn, ... , control=list())

Arguments

Value

A list with the following components:parParameter, $x*$ that are the fixed-point of $F$ such that $x* = F(x*)$, if convergence is successful.value.objfnThe value of the objective function $L$ at termination.fpevalsNumber of times the fixed-point function fixptfn was evaluated.objfevalsNumber of times the objective function objfn was evaluated.convergenceAn integer code indicating type of convergence. 0 indicates successful convergence, whereas 1 denotes failure to converge.

Details

The function squarem is a general-purpose algorithm for accelerating the convergence of any slowly-convergent (smooth) fixed-point iteration. Full names of Default values of control are: K=1, method=3, square=TRUE, step.min0=1, step.max0=1, mstep=4, objfn.inc=1, kr=1, tol=1e-07, maxiter=1500, trace=FALSE. [object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object]

References

R Varadhan and C Roland (2008), Simple and globally convergent numerical schemes for accelerating the convergence of any EM algorithm, Scandinavian Journal of Statistics, 35:335-353. C Roland, R Varadhan, and CE Frangakis (2007), Squared polynomial extrapolation methods with cycling: an application to the positron emission tomography problem, Numerical Algorithms, 44:159-172.

Examples

Run this code

###########################################################################
# Also see the vignette by typing:
#  vignette("SQUAREM", all=FALSE)
#
# Example 1:  EM algorithm for Poisson mixture estimation 

poissmix.em <- function(p,y) {
# The fixed point mapping giving a single E and M step of the EM algorithm
# 
pnew <- rep(NA,3)
i <- 0:(length(y)-1)
zi <- p[1]*exp(-p[2])*p[2]^i / (p[1]*exp(-p[2])*p[2]^i + (1 - p[1])*exp(-p[3])*p[3]^i)
pnew[1] <- sum(y*zi)/sum(y)
pnew[2] <- sum(y*i*zi)/sum(y*zi)
pnew[3] <- sum(y*i*(1-zi))/sum(y*(1-zi))
p <- pnew
return(pnew)
}

poissmix.loglik <- function(p,y) {
# Objective function whose local minimum is a fixed point 
# negative log-likelihood of binary poisson mixture
i <- 0:(length(y)-1)
loglik <- y*log(p[1]*exp(-p[2])*p[2]^i/exp(lgamma(i+1)) + 
		(1 - p[1])*exp(-p[3])*p[3]^i/exp(lgamma(i+1)))
return ( -sum(loglik) )
}

# Real data from Hasselblad (JASA 1969)
poissmix.dat <- data.frame(death=0:9, freq=c(162,267,271,185,111,61,27,8,3,1))
y <- poissmix.dat$freq
tol <- 1.e-08

# Use a preset seed so the example is reproducable. 
require("setRNG")
old.seed <- setRNG(list(kind="Mersenne-Twister", normal.kind="Inversion",
    seed=54321))

p0 <- c(runif(1),runif(2,0,4))  # random starting value

# Basic EM algorithm
pf1 <- fpiter(p=p0, y=y, fixptfn=poissmix.em, objfn=poissmix.loglik, control=list(tol=tol))

# First-order SQUAREM algorithm with SqS3 method
pf2 <- squarem(par=p0, y=y, fixptfn=poissmix.em, objfn=poissmix.loglik, 
               control=list(tol=tol))

# First-order SQUAREM algorithm with SqS2 method
pf3 <- squarem(par=p0, y=y, fixptfn=poissmix.em, objfn=poissmix.loglik, 
               control=list(method=2, tol=tol))

# First-order SQUAREM algorithm with SqS3 method; non-monotone 
# Note: the objective function is not evaluated when objfn.inc = Inf 
pf4 <- squarem(par=p0,y=y, fixptfn=poissmix.em,
               control=list(tol=tol, objfn.inc=Inf))

# First-order SQUAREM algorithm with SqS3 method; 
#  objective function is not specified
pf5 <- squarem(par=p0,y=y, fixptfn=poissmix.em, control=list(tol=tol, kr=0.1))

# Second-order (K=2) SQUAREM algorithm with SqRRE 
pf6 <- squarem(par=p0, y=y, fixptfn=poissmix.em, objfn=poissmix.loglik, 
               control=list (K=2, tol=tol))

# Second-order SQUAREM algorithm with SqRRE; objective function is not specified
pf7 <- squarem(par=p0, y=y, fixptfn=poissmix.em, control=list(K=2, tol=tol))

# Comparison of converged parameter estimates
par.mat <- rbind(pf1$par, pf2$par, pf3$par, pf4$par, pf5$par, pf6$par, pf7$par)
par.mat

# Compare objective function values 
# (note: `NA's indicate that \code{objfn} was not specified)
c(pf1$value, pf2$value, pf3$value, pf4$value, 
  pf5$value, pf6$value, pf7$value)

# Compare number of fixed-point evaluations
c(pf1$fpeval, pf2$fpeval, pf3$fpeval, pf4$fpeval, 
  pf5$fpeval, pf6$fpeval, pf7$fpeval)

# Compare mumber of objective function evaluations 
# (note: `0' indicate that \code{objfn} was not specified)
c(pf1$objfeval, pf2$objfeval, pf3$objfeval, pf4$objfeval, 
  pf5$objfeval, pf6$objfeval, pf7$objfeval)


##############################################################################
# Example 2:  Accelerating the convergence of power method iteration 
#  for finding the dominant eigenvector of a matrix 

power.method <- function(x, A) {
# Defines one iteration of the power method
# x = starting guess for dominant eigenvector
# A = a square matrix
ax <- as.numeric(A %*% x)
f <- ax / sqrt(as.numeric(crossprod(ax)))
f
}

# Finding the dominant eigenvector of the Bodewig matrix
b <- c(2, 1, 3, 4, 1,  -3,   1,   5,  3,   1,   6,  -2,  4,   5,  -2,  -1)
bodewig.mat <- matrix(b,4,4)
eigen(bodewig.mat)

p0 <- rnorm(4)

# Standard power method iteration
ans1 <- fpiter(p0, fixptfn=power.method, A=bodewig.mat)
# re-scaling the eigenvector so that it has unit length
ans1$par <- ans1$par / sqrt(sum(ans1$par^2))  
ans1

# First-order SQUAREM with default settings 
ans2 <- squarem(p0, fixptfn=power.method, A=bodewig.mat, control=list(K=1))
ans2$par <- ans2$par / sqrt(sum(ans2$par^2))
ans2

# First-order SQUAREM with a smaller step.min0
# Convergence is dramatically faster now!
ans3 <- squarem(p0, fixptfn=power.method, A=bodewig.mat, control=list(step.min0 = 0.5))
ans3$par <- ans3$par / sqrt(sum(ans3$par^2))
ans3

# Second-order SQUAREM
ans4 <- squarem(p0, fixptfn=power.method, A=bodewig.mat, control=list(K=2, method="rre"))
ans4$par <- ans4$par / sqrt(sum(ans4$par^2))
ans4

Run the code above in your browser using DataLab