## Not run:
#
#
#
# ## voting statistics at different levels
# ############################################################
#
# # load the cookies dataset:
# data(cookie)
# cookie$freq
# cookie$cookies
#
#
# # performing the spectral analysis
# (out <- spectral(cookie$freq, 6, 3, cookie$cookies))
#
#
# out$obs # the original observations, and the summary statistics
#
# out$exp # each level is conditional on the previous level's statistics
# # (e.g. what you would expect for 1st order effects given sample size)
# # these are generated using 10k markov bases based mcmc samples
#
# out$p.value # these are approximate exact test p-values using various
# # popular test statistics. the approximations are good to
# # monte carlo error
#
# out$p.value.se # these are the standard errors using the sqrt(p*(1-p)/n)
# # asymptotic formula, known to have poor performance
# # for small/large p; see package binom for better
#
# out$statistic # the individual statistics are also available
# # the values are not comprable across Vi levels (the rows)
# # as they have asymptotic chi-squared distributions with
# # different degrees of freedom
#
# out$fullExp # you can also get the expected number of samples at each scale
# # for tables with the same ith order statistics, i = 0, ..., k-1
#
#
# # these can be seen to (re)construct an expected picture of the
# # complete data given each successive collection of statistics
# cbind(
# obs = cookie$freq,
# as.data.frame(lapply(out$fullExp, function(x) round(x[[4]],1)))
# )[c(2:4,1)]
# # notice that the reconstruction given only the first order statistics
# # (the number of individual cookies selected) is quite good
#
#
#
# # instead of using the reconstructions from the exp coming from
# # the samples, you could reconstruct the summaries of the observed
# # data using bump; it's not quite as good :
# V0 <- bump(cookie$freq, 6, 3, 3, 0)
# V1 <- bump(cookie$freq, 6, 3, 3, 1)
# V2 <- bump(cookie$freq, 6, 3, 3, 2)
#
# cbind(
# obs = cookie$freq,
# round(data.frame(
# V0 = bump(V0, 6, 3, 0, 3),
# V1 = bump(V1, 6, 3, 1, 3),
# V2 = bump(V2, 6, 3, 2, 3)
# ), 2)
# )[c(2:4,1)]
#
#
#
#
# # you can see the model step-by-step with showStages() :
# out$showStages()
# # notice (1) the significant reduction in the residuals after conditioning
# # on the first order statistics and also (2) the powdery noise after
# # conditioning on the second order statistics.
# # the p-values reflect the same:
# # * the residuals from conditioning on the sample size show the first
# # order effects are strongly significant (in out$p.value V1 = 0)
# # * the residuals from conditioning on the first order effects suggest
# # the second order effects might be significant (V2 ~ .04-.13ish)
# # * the residuals from conditioning on the second order effects indicate
# # the third order effects are entirely insignificant (V3 > .2)
#
#
# # the isotypic subpaces can be used to determine the pure order effects :
#
# out$isotypicBases # bases of the isotypic subspaces (here 4)
#
# out$effects # pure ith order effects; cookie$freq projected onto the bases
# # these are their effects at the data level, so they all have
# # the same length as the original dataset: choose(n, k)
#
# zapsmall(rowSums(out$effects)) # the effects sum to the data
#
#
# # if the Vk effects are 0, then the conclusion is that Vk is perfectly
# # predicted with the (k-1)st level statistics. this may lead to the
# # conclusion that the l2 norms (say) of the effects might be used to
# # gauge the relative strength of effects :
# out$effectsNorms # = apply(out$effects, 2, lpnorm)
#
#
# # the natural (not full-dimensional) residuals can be seen with the summary
# out
# # or with
# out$residuals
# # these are the residuals (obs ith level stats) - (exp ith level stats)
# # given the (i-1)st statistics
#
#
#
#
#
#
#
#
#
#
#
# # bump is a useful function :
# out$obs
# bump(cookie$freq, 6, 3, 3, 0) # the 0 level is the number of voters, not votes
# bump(cookie$freq, 6, 3, 3, 1)
# bump(cookie$freq, 6, 3, 3, 2)
# bump(cookie$freq, 6, 3, 3, 3)
#
# V1 <- out$obs$V1 # = bump(cookie$freq, 6, 3, 3, 1)
# bump(V1, 6, 3, 1, 0)
# bump(V1, 6, 3, 1, 1)
# bump(V1, 6, 3, 1, 2) # cbind(bump(V1, 6, 3, 1, 2), out$exp$V2)
# bump(V1, 6, 3, 1, 3) # cbind(bump(V1, 6, 3, 1, 3), out$fullExp$V1[[4]])
# # the differences here are between an observation and an expectation
#
#
#
#
#
#
#
#
#
#
# out$obs$V1 - out$exp$V1
# out$residuals$V1
# out$decompose(out$effects$V1)$V1
#
# out$obs$V2 - out$exp$V2
# out$residuals$V2
#
# out$decompose(out$effects$V0)$V2 +
# out$decompose(out$effects$V1)$V2 +
# out$decompose(out$effects$V2)$V2 -
# out$exp$V2
#
#
#
#
#
# # this is how to reconstruct the observation given the effects
# # the cols of out$effects are the Vk order effects reconstructed
# # from the lower level effects
# out$obs$V0
# zapsmall(
# out$decompose(out$effects$V0)$V0
# )
#
# out$obs$V1
# zapsmall(
# out$decompose(out$effects$V0)$V1 +
# out$decompose(out$effects$V1)$V1
# )
#
# out$obs$V2
# zapsmall(
# out$decompose(out$effects$V0)$V2 +
# out$decompose(out$effects$V1)$V2 +
# out$decompose(out$effects$V2)$V2
# )
#
# out$obs$V3
# zapsmall(
# out$decompose(out$effects$V0)$V3 +
# out$decompose(out$effects$V1)$V3 +
# out$decompose(out$effects$V2)$V3 +
# out$decompose(out$effects$V3)$V3
# )
# zapsmall(rowSums(out$effects))
#
# all(cookie$freq == zapsmall(rowSums(out$effects)))
#
#
#
# out$effects$V0
# out$effects$V0 + out$effects$V1
# out$effects$V0 + out$effects$V2
# out$effects$V0 + out$effects$V3
#
#
#
# str(out$sampsDecomposed)
# as.data.frame(lapply(out$sampsDecomposed, function(l) rowMeans(l$V3)))
#
# eff0 <- rowMeans(out$sampsDecomposed$V0$V3)
# cbind(eff0, out$effects$V0)
#
# eff1 <- rowMeans(out$sampsDecomposed$V1$V3 - eff0)
# cbind(eff1, out$effects$V1)
#
# eff2 <- rowMeans(out$sampsDecomposed$V2$V3 - eff0 - eff1)
# cbind(eff2, out$effects$V2)
#
# sum(eff0)
# sum(eff1)
# sum(eff2)
#
#
#
#
#
#
#
# str(out$sampsEffectsNorms)
#
# data <- out$sampsEffectsNorms$V0$V3
# plot(density(data))
# curve(dnorm(x, mean(data), sd(data)), col = "red", add = TRUE)
#
# data <- out$sampsEffectsNorms$V0$V2
# plot(density(data))
# curve(dnorm(x, mean(data), sd(data)), col = "red", add = TRUE)
#
# data <- out$sampsEffectsNorms$V0$V1
# plot(density(data))
# curve(dnorm(x, mean(data), sd(data)), col = "red", add = TRUE)
#
#
# data <- out$sampsEffectsNorms$V1$V3
# plot(density(data))
# curve(dnorm(x, mean(data), sd(data)), col = "red", add = TRUE)
#
# data <- out$sampsEffectsNorms$V1$V2
# plot(density(data))
# curve(dnorm(x, mean(data), sd(data)), col = "red", add = TRUE)
#
#
# data <- out$sampsEffectsNorms$V2$V3
# plot(density(data))
# curve(dnorm(x, mean(data), sd(data)), col = "red", add = TRUE)
#
#
#
#
#
#
# ## how to convert data into the right format
# ############################################################
# # this essentially just uses some clever indexing tricks
# # to reorder the data in the way you want
#
# data <- cookie$raw # an example raw, unordered dataset
# levels <- cookie$cookies # the order of the objects you want
# levsNndcs <- 1:length(levels)
# names(levsNndcs) <- levels
#
#
# # arrange selections within rows (order of selection doesn't matter)
# data <- t(apply(data, 1, function(x) x[order(levsNndcs[x])] ))
#
#
# # arrange rows (order of selectors doesn't matter)
# for(k in ncol(data):1) data <- data[order(levsNndcs[data[,k]]),]
#
#
# # check that you've done the right thing
# all( data == cookie$sorted )
#
# # the data frequency order should match that of subsets:
# subsets(levels, 1)
#
# subsets(levels, 2)
# sapply(subsets(levels, 2), paste, collapse = ", ")
#
# subsets(levels, 3)
# sapply(subsets(levels, 3), paste, collapse = ", ")
#
# names(cookie$freq)
# names(cookie$freq) == sapply(subsets(levels, 3), paste, collapse = ", ")
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
# ## other examples
# ############################################################
#
# # rvotes provides uniform samples
#
# n <- 4
# k <- 2
#
# raw <- rvotes(250, n, k)
# rawTogether <- apply(raw, 1, paste, collapse = " ")
# levels <- sapply(subsets(n, k), paste, collapse = " ")
# freq <- table( factor(rawTogether, levels = levels) )
# (out <- spectral(freq, n, k))
#
# out$p.value
# out$showStages()
#
# out$obs
# out$exp
#
#
#
#
#
# n <- 6
# k <- 3
# raw <- rvotes(250, n, k)
# rawTogether <- apply(raw, 1, paste, collapse = " ")
# levels <- sapply(subsets(n, k), paste, collapse = " ")
# freq <- table( factor(rawTogether, levels = levels) )
# (out <- spectral(freq, n, k))
#
#
# n <- 7
# k <- 3
# raw <- rvotes(250, n, k)
# rawTogether <- apply(raw, 1, paste, collapse = " ")
# levels <- sapply(subsets(n, k), paste, collapse = " ")
# freq <- table( factor(rawTogether, levels = levels) )
# (out <- spectral(freq, n, k))
#
#
#
# n <- 8
# k <- 3
# raw <- rvotes(250, n, k)
# rawTogether <- apply(raw, 1, paste, collapse = " ")
# levels <- sapply(subsets(n, k), paste, collapse = " ")
# freq <- table( factor(rawTogether, levels = levels) )
# # out <- spectral(freq, n, k) # breaks
#
#
#
#
# ## End(Not run)
Run the code above in your browser using DataLab