# Ideal crisp-set data from Baumgartner (2009) on education levels in western democracies
# ----------------------------------------------------------------------------------------
# Exhaustive CNA without constraints on the search space; print atomic and complex
# solution formulas (default output).
cna.educate <- cna(d.educate)
cna.educate
# The two resulting complex solution formulas represent a common cause structure
# and a causal chain, respectively. The common cause structure is graphically depicted
# in (Note, figure (a)), the causal chain in (Note, figure (b)).
# Build solutions with other than standard evaluation measures.
cna(d.educate, measures = c("ccon", "ccov"))
cna(d.educate, measures = c("PAcon", "PACcov"))
# CNA with negations of the factors E and L.
cna(d.educate, notcols = c("E","L"))
# The same by use of the outcome argument.
cna(d.educate, outcome = c("e","l"))
# CNA with negations of all factors.
cna(d.educate, notcols = "all")
# Print msc, asf, and csf with additional evaluation measures and solution attributes.
cna(d.educate, what = "mac", details = c("ccon","ccov","PAcon","PACcov","exhaustive"))
cna(d.educate, what = "mac", details = c("e","f","AACcon","AAcov"))
cna(d.educate, what = "mac", details = TRUE)
# Print solutions without spaces before and after "+".
options(spaces = c("<->", "->" ))
cna(d.educate, details = c("e", "f"))
# Print solutions with spaces before and after "*".
options(spaces = c("<->", "->", "*" ))
cna(d.educate, details = c("e", "f", "PAcon", "PACcov"))
# Restore the default of the option "spaces".
options(spaces = c("<->", "->", "+"))
# Crisp-set data from Krook (2010) on representation of women in western-democratic
# parliaments
# -----------------------------------------------------------------------------------
# This example shows that CNA can distinguish exogenous and endogenous factors in the
# data. Without being told which factor is the outcome, CNA reproduces the original
# QCA of Krook (2010).
ana1 <- cna(d.women, measures = c("PAcon", "PACcov"), details = c("e", "f"))
ana1
# The two resulting asf only reach an exhaustiveness score of 0.438, meaning that
# not all configurations that are compatible with the asf are contained in the data
# "d.women". Here is how to extract the configurations that are compatible with
# the first asf but are not contained in "d.women".
library(dplyr)
setdiff(ct2df(selectCases(asf(ana1)$condition[1], full.ct(d.women))),
d.women)
# Highly ambiguous crisp-set data from Wollebaek (2010) on very high volatility of
# grassroots associations in Norway
# --------------------------------------------------------------------------------
# csCNA with ordering from Wollebaek (2010) [Beware: due to massive ambiguities,
# this analysis will take about 20 seconds to compute.]
cna(d.volatile, ordering = "VO2", maxstep = c(6, 6, 16))
# Using suff.only, CNA can be forced to abandon the analysis after minimization of
# sufficient conditions. [This analysis terminates quickly.]
cna(d.volatile, ordering = "VO2", maxstep = c(6, 6, 16), suff.only = TRUE)
# Similarly, by using the default maxstep, CNA can be forced to only search for asf
# and csf with reduced complexity.
cna(d.volatile, ordering = "VO2")
# ordering = "VO2" only excludes that the values of VO2 are causes of the values
# of the other factors in d.volatile, but cna() still tries to model other factor
# values as outcomes. The following call determines that only VO2 is a possible
# outcome. (This call terminates quickly.)
cna(d.volatile, outcome = "VO2")
# We can even increase maxstep.
cna(d.volatile, outcome = "VO2", maxstep=c(4,4,16))
# If it is known that, say, el and od cannot be causes of VO2, we can exclude this.
cna(d.volatile, outcome = "VO2", maxstep=c(4,4,16), exclude = "el, od -> VO2")
# The verbose argument returns information during the execution of cna().
cna(d.volatile, ordering = "VO2", verbose = TRUE)
# Multi-value data from Hartmann & Kemmerzell (2010) on party bans in Africa
# ---------------------------------------------------------------------------
# mvCNA with an outcome specification taken from Hartmann & Kemmerzell
# (2010); standard coverage threshold at 0.95 (standard consistency threshold at 1),
# maxstep at c(6, 6, 10).
cna.pban <- cna(d.pban, outcome = "PB=1", cov = .95, maxstep = c(6, 6, 10),
what = "all")
cna.pban
# The previous function call yields a total of 14 asf and csf, only 5 of which are
# printed in the default output. Here is how to extract all 14 asf and csf.
asf(cna.pban)
csf(cna.pban)
# [Note that all of these 14 causal models reach better consistency and
# coverage scores than the one model Hartmann & Kemmerzell (2010) present in their
# paper, which they generated using the TOSMANA software, version 1.3.
# T=0 + T=1 + C=2 + T=1*V=0 + T=2*V=0 <-> PB=1]
condTbl("T=0 + T=1 + C=2 + T=1*V=0 + T=2*V=0 <-> PB = 1", d.pban)
# Extract all minimally sufficient conditions with further details.
msc(cna.pban, details = c("ccon", "ccov", "PAcon", "PACcov"))
# Alternatively, all msc, asf, and csf can be recovered by means of the nsolutions
# argument of the print function, which also allows for adding details.
print(cna.pban, nsolutions = "all", details = c("AACcon", "AAcov", "ex", "fa"))
# Print the configuration table with the "cases" column.
print(cna.pban, what = "t", show.cases = TRUE)
# Build solution formulas with maximally 4 disjuncts.
cna(d.pban, outcome = "PB=1", cov = .95, maxstep = c(4, 4, 10))
# Use non-standard evaluation measures for solution building.
cna(d.pban, outcome = "PB=1", cov = .95, measures = c("PAcon", "PACcov"))
# Only print 2 digits of standard consistency and coverage scores.
print(cna.pban, digits = 2)
# Build all but print only two msc for each factor and two asf and csf.
print(cna(d.pban, outcome = "PB=1", cov = .95,
maxstep = c(6, 6, 10), what = "all"), nsolutions = 2)
# Lowering the thresholds on standard consistency and coverage yields further
# models with excellent fit scores; print only asf.
cna(d.pban, outcome = "PB=1", con = .93, what = "a", maxstep = c(6, 6, 10))
# Lowering both standard consistency and coverage.
cna(d.pban, outcome = "PB=1", con = .9, cov =.9, maxstep = c(6, 6, 10))
# Lowering both standard consistency and coverage and excluding F=0 as potential
# cause of PB=1.
cna(d.pban, outcome = "PB=1", con = .9, cov =.9, maxstep = c(6, 6, 10),
exclude = "F=0 -> PB=1")
# Specifying an outcome is unnecessary for d.pban. PB=1 is the only
# factor value in those data that could possibly be an outcome.
cna(d.pban, con=.9, cov = .9, maxstep = c(6, 6, 10))
# Fuzzy-set data from Basurto (2013) on autonomy of biodiversity institutions in Costa Rica
# ---------------------------------------------------------------------------------------
# Basurto investigates two outcomes: emergence of local autonomy and endurance thereof. The
# data for the first outcome are contained in rows 1-14 of d.autonomy, the data for the second
# outcome in rows 15-30. For each outcome, the author distinguishes between local ("EM",
# "SP", "CO"), national ("CI", "PO") and international ("RE", "CN", "DE") conditions. Here,
# we first apply fsCNA to replicate the analysis for the local conditions of the endurance of
# local autonomy.
dat1 <- d.autonomy[15:30, c("AU","EM","SP","CO")]
cna(dat1, ordering = "AU", strict = TRUE, con = .9, cov = .9)
# The CNA model has significantly better consistency (and equal coverage) scores than the
# model presented by Basurto (p. 580): SP*EM + CO <-> AU, which he generated using the
# fs/QCA software.
condition("SP*EM + CO <-> AU", dat1) # both EM and CO are redundant to account for AU
# If we allow for dependencies among the conditions by setting strict = FALSE, CNA reveals
# that SP is a common cause of both AU and EM.
cna(dat1, ordering = "AU", strict = FALSE, con = .9, cov = .9)
# Here are two analyses at different con/cov thresholds for the international conditions
# of autonomy endurance.
dat2 <- d.autonomy[15:30, c("AU","RE", "CN", "DE")]
cna(dat2, ordering = "AU", con = .9, cov = .85)
cna(dat2, ordering = "AU", con = .85, cov = .9, details = TRUE)
# Here are two analyses of the whole dataset using different evaluation measures.
# They show that across the whole period 1986-2006, the best causal model of local
# autonomy (AU) renders that outcome dependent only on local direct spending (SP).
cna(d.autonomy, outcome = "AU", con = .85, cov = .9,
maxstep = c(5, 5, 11), details = TRUE)
cna(d.autonomy, outcome = "AU", measures = c("AACcon","AAcov"), con = .85, cov = .9,
maxstep = c(5, 5, 11), details = TRUE)
# High-dimensional data
# ---------------------
# Here's an analysis of the data d.highdim with 50 factors, massive
# fragmentation, and 20% noise. (Takes about 15 seconds to compute.)
head(d.highdim)
cna(d.highdim, outcome = c("V13", "V11"), con = .8, cov = .8)
# By lowering maxstep, computation time can be reduced to less than 1 second
# (at the cost of an incomplete solution).
cna(d.highdim, outcome = c("V13", "V11"), con = .8, cov = .8,
maxstep = c(2,3,10))
# Highly ambiguous artificial data to illustrate exhaustiveness and acyclic.only
# ------------------------------------------------------------------------------
mycond <- "(D + C*f <-> A)*(C*d + c*D <-> B)*(B*d + D*f <-> C)*(c*B + B*f <-> E)"
dat1 <- selectCases(mycond)
ana1 <- cna(dat1, details = c("e","cy"))
# There exist almost 2M csf. This is how to build the first 927 of them, with
# additional messages about the csf building process.
first.csf <- csf(ana1, verbose = TRUE)
first.csf
# Most of these csf are compatible with more configurations than are contained in
# dat1. Only 141 csf in first.csf are perfectly exhaustive (i.e. all compatible
# configurations are contained in dat1).
subset(first.csf, exhaustiveness == 1)
# All of the csf in first.csf contain cyclic substructures.
subset(first.csf, cyclic == TRUE)
# Here's how to build acyclic csf.
ana2 <- cna(dat1, details = c("e","cy"), acyclic.only = TRUE)
csf(ana2, verbose = TRUE)
Run the code above in your browser using DataLab