rdir.pc: Data-driven sampling of random directions guided by sample of functional data

Description

Generation of random directions based on the principal components $\hat e_1,\ldots,\hat e_k$ of a sample of functional data $X_1,\ldots,X_n$. The random directions are sampled as $$h=\sum_{j=1}^kh_j\hat e_j,$$ with $h_j\sim\mathcal{N}(0, \sigma_j^2)$, $j=1,\ldots,k$. Useful for sampling non-orthogonal random directions $h$ such that they are non-orthogonal for the random sample.

Usage

rdir.pc(
  n,
  X.fdata,
  ncomp = 0.95,
  fdata2pc.obj = fdata2pc(X.fdata, ncomp = min(length(X.fdata$argvals), nrow(X.fdata))),
  sd = 0,
  zero.mean = TRUE,
  norm = FALSE
)

Arguments

number of curves to be generated.

X.fdata

an fdata object used to compute the functional principal components.

ncomp

if an integer vector is provided, the index for the principal components to be considered. If a threshold between 0 and 1 is given, the number of components $k$ is determined automatically as the minimum number that explains at least the ncomp proportion of the total variance of X.fdata.

fdata2pc.obj

output of fdata2pc containing as many components as the ones to be selected by ncomp. Otherwise, it is computed internally.

if 0, the standard deviations $\sigma_j$ are estimated by the standard deviations of the scores for $e_j$. If not, the $\sigma_j$'s are set to sd.

zero.mean

whether the projections should have zero mean. If not, the mean is set to the mean of X.fdata.

norm

whether the samples should be L2-normalized or not.

Value

A fdata object with the sampled directions.

Examples

Run this code

# NOT RUN {
# Simulate some data
set.seed(345673)
X.fdata <- r.ou(n = 200, mu = 0, alpha = 1, sigma = 2, t = seq(0, 1, l = 201), 
                x0 = rep(0, 200))
pc <- fdata2pc(X.fdata, ncomp = 20)

# Samples
set.seed(34567)
rdir.pc(n = 5, X.fdata = X.fdata, zero.mean = FALSE)$data[, 1:5]
set.seed(34567)
rdir.pc(n = 5, X.fdata = X.fdata, fdata2pc.obj = pc)$data[, 1:5]

# Comparison for the variance type
set.seed(456732)
n.proj <- 100
set.seed(456732)
samp1 <- rdir.pc(n = n.proj, X.fdata = X.fdata, sd = 1, norm = FALSE, ncomp = 0.99)
set.seed(456732)
samp2 <- rdir.pc(n = n.proj, X.fdata = X.fdata, sd = 0, norm = FALSE, ncomp = 0.99)
set.seed(456732)
samp3 <- rdir.pc(n = n.proj, X.fdata = X.fdata, sd = 1, norm = TRUE, ncomp = 0.99)
set.seed(456732)
samp4 <- rdir.pc(n = n.proj, X.fdata = X.fdata, sd = 0, norm = TRUE, ncomp = 0.99)
par(mfrow = c(1, 2))
plot(X.fdata, col = gray(0.85), lty = 1)
lines(samp1[1:10], col = 2, lty = 1)
lines(samp2[1:10], col = 4, lty = 1)
legend("topleft", legend = c("Data", "Different variances", "Equal variances"), 
       col = c(gray(0.85), 2, 4), lwd = 2)
plot(X.fdata, col = gray(0.85), lty = 1)
lines(samp3[1:10], col = 5, lty = 1)
lines(samp4[1:10], col = 6, lty = 1)
legend("topleft", legend = c("Data", "Different variances, normalized", 
       "Equal variances, normalized"), col = c(gray(0.85), 5:6), lwd = 2)

# Correlations (stronger with different variances and unnormalized; 
# stronger with lower ncomp)
ind <- lower.tri(matrix(nrow = n.proj, ncol = n.proj))
median(abs(cor(sapply(1:n.proj, function(i) inprod.fdata(X.fdata, samp1[i]))))[ind])
median(abs(cor(sapply(1:n.proj, function(i) inprod.fdata(X.fdata, samp2[i]))))[ind])
median(abs(cor(sapply(1:n.proj, function(i) inprod.fdata(X.fdata, samp3[i]))))[ind])
median(abs(cor(sapply(1:n.proj, function(i) inprod.fdata(X.fdata, samp4[i]))))[ind])

# Comparison for the threshold
samp1 <- rdir.pc(n = 100, X.fdata = X.fdata, ncomp = 0.25, fdata2pc.obj = pc)
samp2 <- rdir.pc(n = 100, X.fdata = X.fdata, ncomp = 0.50, fdata2pc.obj = pc)
samp3 <- rdir.pc(n = 100, X.fdata = X.fdata, ncomp = 0.90, fdata2pc.obj = pc)
samp4 <- rdir.pc(n = 100, X.fdata = X.fdata, ncomp = 0.95, fdata2pc.obj = pc)
samp5 <- rdir.pc(n = 100, X.fdata = X.fdata, ncomp = 0.99, fdata2pc.obj = pc)
cols <- rainbow(5, alpha = 0.25)
par(mfrow = c(3, 2))
plot(X.fdata, col = gray(0.75), lty = 1, main = "Data")
plot(samp1, col = cols[1], lty = 1, main = "Threshold = 0.25")
plot(samp2, col = cols[2], lty = 1, main = "Threshold = 0.50")
plot(samp3, col = cols[3], lty = 1, main = "Threshold = 0.90")
plot(samp4, col = cols[4], lty = 1, main = "Threshold = 0.95")
plot(samp5, col = cols[5], lty = 1, main = "Threshold = 0.99")

# Normalizing
samp1 <- rdir.pc(n = 100, X.fdata = X.fdata, ncomp = 0.50, fdata2pc.obj = pc,
                 norm = TRUE)
samp2 <- rdir.pc(n = 100, X.fdata = X.fdata, ncomp = 0.90, fdata2pc.obj = pc,
                 norm = TRUE)
samp3 <- rdir.pc(n = 100, X.fdata = X.fdata, ncomp = 0.95, fdata2pc.obj = pc,
                 norm = TRUE)
samp4 <- rdir.pc(n = 100, X.fdata = X.fdata, ncomp = 0.99, fdata2pc.obj = pc,
                 norm = TRUE)
samp5 <- rdir.pc(n = 100, X.fdata = X.fdata, ncomp = 0.999, fdata2pc.obj = pc,
                 norm = TRUE)
cols <- rainbow(5, alpha = 0.25)
par(mfrow = c(3, 2))
plot(X.fdata, col = gray(0.75), lty = 1, main = "Data")
plot(samp1, col = cols[1], lty = 1, main = "Threshold = 0.50")
plot(samp2, col = cols[2], lty = 1, main = "Threshold = 0.90")
plot(samp3, col = cols[3], lty = 1, main = "Threshold = 0.95")
plot(samp4, col = cols[4], lty = 1, main = "Threshold = 0.99")
plot(samp5, col = cols[5], lty = 1, main = "Threshold = 0.999")
# }

Run the code above in your browser using DataLab