# NOT RUN {
# Simple CA (SCA) of a 5 by 4 contingency table, using all SCA defaults:
# 999 bootstraps, Poisson resampling, variances for up to first four axes,
# usual output for up to the first 4 axes,
# one biplot with CRs for rows in principal coordinates and another with
# CRs for columns in principal coordinates
bd <- cabootcrs(DreamData)
# }
# NOT RUN {
# Same data set with a completely random three-category third variable added,
# analysed with MCA but with standardisations which mimic SCA as much as possible
bd3 <- cabootcrs(DreamData223by3, catype="mca")
Explicitly stating what the rows and columns represent, often needed for a contingency table
bd <- cabootcrs(DreamData, datasetname="Maxwell's dream data",
varnames=c("What the rows are","What the columns are"))
# Multiple CA (MCA) of 3 categorical variables with all defaults:
# non-parametric resampling, Burt matrix analysed,
# each variable has one plot with it in colour with CRs shown, other variables in monochrome.
# Same data set but now as 223 by 3 matrix, with random 3rd column (with 3 categories) added.
bd3 <- cabootcrs(DreamData223by3, catype="mca")
# Comparison of SCA to MCA with p=2, by converting contingency table to 223 by 2 matrix.
# Note that the coordinates and inertias etc are the same while the standard deviations
# and hence the ellipses are very similar but not identical.
bd <- cabootcrs(DreamData)
DreamData223by2 <- convert(DreamData,input="CT",output="nbyp")$result
bdmca <- cabootcrs(DreamData223by2, catype="mca", varandcat=FALSE)
# Not adjusting inertias, which means that coordinates will also not be adjusted and
# the bootstrapping will use the Burt diagonal.
# Note how the coordinates are larger but the inertias and ellipses are smaller.
bdmcaunadj <- cabootcrs(DreamData223by2, catype="mca", varandcat=FALSE, mcaadjustinertias=FALSE)
# Applying the standard adjustments to inertias and coordinates, but with
# the bootstrapping still using the Burt diagonal.
# Note how inertias and coordinates are now the same as SCA, but ellipses are smaller.
bdmcaadjbutall <- cabootcrs(DreamData223by2, catype="mca", varandcat=FALSE, mcasupplementary="all")
# Effect of sample size in SCA:
bdx4 <- cabootcrs(4*DreamData)
bdx9 <- cabootcrs(9*DreamData)
ba <- cabootcrs(AttachmentData)
bs <- cabootcrs(SuicideData)
bas <- cabootcrs(AsbestosData)
# Options for SCA:
# SCA with multinomial resampling, with the matrix treated as a single multinomial distribution
bdm <- cabootcrs(DreamData, resampledistn="multinomial")
# Fix the row sums, i.e. keep sum of age group constant
bdmrf <- cabootcrs(DreamData, resampledistn="multinomial", multinomialtype="rowsfixed")
# Use chi-squared critical values for the CRs
bdchisq <- cabootcrs(DreamData, usebootcrits=FALSE)
# Just perform correspondence analysis, without bootstrapping
bdnb0 <- cabootcrs(DreamData, nboots=0)
# Effect of sample size in MCA:
bn <- cabootcrs(NishData, catype="mca")
# Options for MCA
# Using default settings the SCA and MCA standard results are the same when p=2,
# bootstrap standard deviations (multinomial/nonparametric) are similar but not identical
bdsca <- cabootcrs(DreamData,resampledistn="multinomial")
bdmca <- cabootcrs(convert(DreamData,input="CT",output="nbyp")$result, catype="mca")
# Row A can be labelled A rather than R:A
# because the three variables have all different category names
bd3l <- cabootcrs(DreamData223by3, catype="mca", varandcat=FALSE)
# Balanced resampling, each of the 223 rows occurs 999 times in the 999 resamples
bd3b <- cabootcrs(DreamData223by3, catype="mca", resampledistn="balanced")
# Do not adjust inertias, coordinates or contributions
# (if inertias are not adjusted then coordinates are also not adjusted)
bd3unadj <- cabootcrs(DreamData223by3, catype="mca",mcaadjustinertias=FALSE)
## Comparisons to ellipses from FactoMineR
# Generate some completely random uniform categorical data, construct ellipses.
# The cabootcrs ellipses are very large and overlap extensively, as you would expect
# from completely random data.
# The FactoMineR ellipses are much smaller, often with minimal overlaps,
# giving a completely false impression of genuine differences between categories.
library(FactoMineR)
p <- 4
maxcat <- 5
n <- 100
Xnpr <- apply( as.data.frame( matrix( round(runif(n*p,0.5,maxcat+0.5)), n, p)), 2, factor )
fr <- MCA(Xnpr, method="Burt")
plotellipses(fr)
br <- cabootcrs(Xnpr, catype="mca", showresults=FALSE)
plotca(br, mcacategorycolours = TRUE, showcolumnlabels=FALSE)
## Comparisons to results in ca and FactoMineR
Summary: If using unadjusted inertias, coordinates the packages produce identical results,
apart from differences in presentation (rounding off, the naming of rep/cor/cos2).
Summary: When using adjusted inertias and coordinates (not an option in FactoMineR::MCA)
the correlations in ca::mjca no longer sum to 1 over all dimensions, in cabootcrs they do.
Ratios are the same for each dimension, but not each point, they are standardised differently.
# Example comparisons with random data
library(FactoMineR)
library(ca)
p <- 4
maxcat <- 5
n <- 100
Xnpdf <- as.data.frame( matrix( round(runif(n*p,0.5,maxcat+0.5)), n, p))
Xnpr <- apply( Xnpdf, 2, factor )
# Note that ca::mjca only accepts the data as numerical,
# FactoMineR::MCA only acccepts the data as characters
rbun <- cabootcrs(Xnpr, catype="mca", nboots=0, mcaadjustinertias = FALSE)
rcun <- mjca(Xnpdf,lambda="Burt")
summary(rcun)
rfm <- MCA(Xnpr,method="Burt", graph=FALSE)
summary(rfm)
rb <- cabootcrs(Xnpr, catype="mca", nboots=0)
rc <- mjca(Xnpdf)
summary(rc)
realr <- rb@br@realr
rb@ColREP[,1:realr]
rc$colcor[,1:realr]
apply(rb@ColREP[,1:realr],1,"sum")
apply(rc$colcor[,1:realr],1,"sum")
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab