# Ideal crisp-set data from Baumgartner (2009a) on education levels in western democracies
# ----------------------------------------------------------------------------------------
# Exhaustive CNA without constraints on the search space; print atomic and complex
# solution formulas (default output).
cna.educate <- cna(d.educate)
cna.educate
# The two resulting complex solution formulas represent a common cause structure
# and a causal chain, respectively. The common cause structure is graphically depicted
# in (Note, figure (a)), the causal chain in (Note, figure (b)).
# Print only complex solution formulas.
print(cna.educate, what = "c")
# Print only atomic solution formulas.
print(cna.educate, what = "a")
# Print only minimally sufficient conditions.
print(cna.educate, what = "m")
# Print only the configuration table.
print(cna.educate, what = "t")
# CNA with negations of the factors E and L.
cna(d.educate, notcols = c("E","L"))
# The same by use of the outcome argument.
cna(d.educate, outcome = c("e","l"))
# CNA with negations of all factors.
cna(d.educate, notcols = "all")
# Print msc, asf, and csf with all solution attributes.
cna(d.educate, what = "mac", details = TRUE)
# Add only the non-standard solution attributes "exhaustiveness" and "faithfulness".
cna(d.educate, details = c("e", "f"))
# Print solutions without spaces before and after "+".
options(spaces = c("<->", "->" ))
cna(d.educate, details = c("e", "f"))
# Print solutions with spaces before and after "*".
options(spaces = c("<->", "->", "*" ))
cna(d.educate, details = c("e", "f"))
# Restore the default of the option "spaces".
options(spaces = c("<->", "->", "+"))
# Crisp-set data from Krook (2010) on representation of women in western-democratic parliaments
# ------------------------------------------------------------------------------------------
# This example shows that CNA can distinguish exogenous and endogenous factors in the data.
# Without being told which factor is the outcome, CNA reproduces the original QCA
# of Krook (2010).
ana1 <- cna(d.women, details = c("e", "f"))
ana1
# The two resulting asf only reach an exhaustiveness score of 0.438, meaning that
# not all configurations that are compatible with the asf are contained in the data
# "d.women". Here is how to extract the configurations that are compatible with
# the first asf but are not contained in "d.women".
library(dplyr)
setdiff(ct2df(selectCases(asf(ana1)$condition[1], full.ct(d.women))),
d.women)
# Highly ambiguous crisp-set data from Wollebaek (2010) on very high volatility of
# grassroots associations in Norway
# --------------------------------------------------------------------------------
# csCNA with ordering from Wollebaek (2010) [Beware: due to massive ambiguities, this analysis
# will take about 20 seconds to compute.]
cna(d.volatile, ordering = "VO2", maxstep = c(6, 6, 16))
# Using suff.only, CNA can be forced to abandon the analysis after minimization of sufficient
# conditions. [This analysis terminates quickly.]
cna(d.volatile, ordering = "VO2", maxstep = c(6, 6, 16), suff.only = TRUE)
# Similarly, by using the default maxstep, CNA can be forced to only search for asf and csf
# with reduced complexity.
cna(d.volatile, ordering = "VO2")
# The verbose argument returns information during the execution of cna().
cna(d.volatile, ordering = "VO2", verbose = TRUE)
# Multi-value data from Hartmann & Kemmerzell (2010) on party bans in Africa
# ---------------------------------------------------------------------------
# mvCNA with an outcome specification taken from Hartmann & Kemmerzell
# (2010); coverage cutoff at 0.95 (consistency cutoff at 1), maxstep at c(6, 6, 10).
cna.pban <- cna(d.pban, outcome = "PB=1", cov = .95, maxstep = c(6, 6, 10),
what = "all")
cna.pban
# The previous function call yields a total of 14 asf and csf, only 5 of which are
# printed in the default output. Here is how to extract all 14 asf and csf.
asf(cna.pban)
csf(cna.pban)
# [Note that all of these 14 causal models reach better consistency and
# coverage scores than the one model Hartmann & Kemmerzell (2010) present in their paper,
# which they generated using the TOSMANA software, version 1.3.
# T=0 + T=1 + C=2 + T=1*V=0 + T=2*V=0 <-> PB=1]
condTbl("T=0 + T=1 + C=2 + T=1*V=0 + T=2*V=0 <-> PB = 1", d.pban)
# Extract all minimally sufficient conditions.
msc(cna.pban)
# Alternatively, all msc, asf, and csf can be recovered by means of the nsolutions
# argument of the print function.
print(cna.pban, nsolutions = "all")
# Print the configuration table with the "cases" column.
print(cna.pban, what = "t", show.cases = TRUE)
# Build solution formulas with maximally 4 disjuncts.
cna(d.pban, outcome = "PB=1", cov = .95, maxstep = c(4, 4, 10))
# Only print 2 digits of consistency and coverage scores.
print(cna.pban, digits = 2)
# Build all but print only two msc for each factor and two asf and csf.
print(cna(d.pban, outcome = "PB=1", cov = .95,
maxstep = c(6, 6, 10), what = "all"), nsolutions = 2)
# Lowering the consistency instead of the coverage threshold yields further models with
# excellent fit scores; print only asf.
cna(d.pban, outcome = "PB=1", con = .93, what = "a", maxstep = c(6, 6, 10))
# Specifying an outcome is unnecessary for d.pban. PB=1 is the only
# factor value in those data that could possibly be an outcome.
cna(d.pban, cov = .95, maxstep = c(6, 6, 10))
# Fuzzy-set data from Basurto (2013) on autonomy of biodiversity institutions in Costa Rica
# ---------------------------------------------------------------------------------------
# Basurto investigates two outcomes: emergence of local autonomy and endurance thereof. The
# data for the first outcome are contained in rows 1-14 of d.autonomy, the data for the second
# outcome in rows 15-30. For each outcome, the author distinguishes between local ("EM",
# "SP", "CO"), national ("CI", "PO") and international ("RE", "CN", "DE") conditions. Here,
# we first apply fsCNA to replicate the analysis for the local conditions of the endurance of
# local autonomy.
dat1 <- d.autonomy[15:30, c("AU","EM","SP","CO")]
cna(dat1, ordering = "AU", strict = TRUE, con = .9, cov = .9)
# The fsCNA model has significantly better consistency (and equal coverage) scores than the
# model presented by Basurto (p. 580): SP*EM + CO <-> AU, which he generated using the
# fs/QCA software.
condition("SP*EM + CO <-> AU", dat1) # both EM and CO are redundant to account for AU
# If we allow for dependencies among the conditions by setting strict = FALSE, CNA reveals
# that SP is a common cause of both AU and EM.
cna(dat1, ordering = "AU", strict = FALSE, con = .9, cov = .9)
# Here is the analysis for the international conditions of autonomy endurance, which
# yields the same model as the one presented by Basurto (plus one model Basurto does not mention).
dat2 <- d.autonomy[15:30, c("AU","RE", "CN", "DE")]
cna(dat2, ordering = "AU", con = .9, con.msc = .85, cov = .85)
# But there are other models (here printed with all solution attributes)
# that fare equally well.
cna(dat2, ordering = "AU", con = .85, cov = .9, details = TRUE)
# Finally, here is an analysis of the whole dataset, showing that across the whole period
# 1986-2006, the best causal model of local autonomy (AU) renders that outcome dependent
# only on local direct spending (SP).
cna(d.autonomy, outcome = "AU", con = .85, cov = .9,
maxstep = c(5, 5, 11), details = TRUE)
# Also build non-INUS solutions.
asf(cna(d.autonomy, outcome = "AU", con = .85, cov = .9,
maxstep = c(5, 5, 11), details = TRUE, inus.only = FALSE))
# High-dimensional data
# ---------------------
# As of package version 3.1, cna's handling of data with more than 20 factors
# has been improved. Here's an analysis of the data d.highdim with 50 factors, massive
# fragmentation, and 20% noise. (Takes about 15 seconds to compute.)
head(d.highdim)
cna(d.highdim, outcome = c("V13", "V11"), con = .8, cov = .8)
# By lowering maxstep, computation time can be reduced to less than 1 second
# (at the cost of an incomplete solution).
cna(d.highdim, outcome = c("V13", "V11"), con = .8, cov = .8,
maxstep = c(2,3,10))
# Highly ambiguous artificial data to illustrate exhaustiveness and acyclic.only
# ------------------------------------------------------------------------------
mycond <- "(D + C*f <-> A)*(C*d + c*D <-> B)*(B*d + D*f <-> C)*(c*B + B*f <-> E)"
dat1 <- selectCases(mycond)
ana1 <- cna(dat1, details = c("e","cy"))
# There exist almost 2M csf. This is how to build the first 1076 of them, with
# additional messages about the csf building process.
first.csf <- csf(ana1, verbose = TRUE)
# Most of these csf are compatible with more configurations than are contained in
# dat1. Only 193 csf in first.csf are perfectly exhaustive (i.e. all compatible
# configurations are contained in dat1).
subset(first.csf, exhaustiveness == 1)
# 1020 of the csf in first.csf contain cyclic substructures.
subset(first.csf, cyclic == TRUE)
# Here's how to only build acyclic csf.
ana2 <- cna(dat1, details = c("e","cy"), acyclic.only = TRUE)
csf(ana2, verbose = TRUE)
# Illustration of only.minimal.msc = FALSE
# ----------------------------------------
# Simulate noisy data on the causal structure "a*B*d + A*c*D <-> E"
set.seed(1324557857)
mydata <- allCombs(rep(2, 5)) - 1
dat1 <- makeFuzzy(mydata, fuzzvalues = seq(0, 0.5, 0.01))
dat1 <- ct2df(selectCases1("a*B*d + A*c*D <-> E", con = .8, cov = .8, dat1))
# In dat1, "a*B*d + A*c*D <-> E" has the following con and cov scores.
as.condTbl(condition("a*B*d + A*c*D <-> E", dat1))
# The standard algorithm of CNA will, however, not find this structure with
# con = cov = 0.8 because one of the disjuncts (a*B*d) does not meet the con
# threshold.
as.condTbl(condition(c("a*B*d <-> E", "A*c*D <-> E"), dat1))
cna(dat1, outcome = "E", con = .8, cov = .8)
# With the argument con.msc we can lower the con threshold for msc, but this does not
# recover "a*B*d + A*c*D <-> E" either.
cna2 <- cna(dat1, outcome = "E", con = .8, cov = .8, con.msc = .78)
cna2
msc(cna2)
# The reason is that "A*c -> E" and "c*D -> E" now also meet the con.msc threshold and,
# therefore, "A*c*D -> E" is not contained in the msc---because of violated minimality.
# In a situation like this, lifting the minimality requirement via
# only.minimal.msc = FALSE allows CNA to find the intended target.
cna(dat1, outcome = "E", con = .8, cov = .8, con.msc = .78,
only.minimal.msc = FALSE)
# Overriding automatic detection of the data type
# ------------------------------------------------
# The type argument allows for manually setting the data type.
# If "cs" data are treated as "mv" data, cna() automatically builds models for all values
# of outcome factors, i.e. both positive and negated outcomes.
cna(d.educate, type = "mv")
# Treating "cs" data as "fs".
cna(d.women, type = "fs")
# Not all manual settings are admissible.
try(cna(d.autonomy, outcome = "AU", con = .8, cov = .8, type = "mv" ))
# Shortcut functions from previous versions of the package continue to work
# (see ?shortcuts).
fscna(d.autonomy, outcome = "AU", con = .8, cov = .8)
mvcna(d.pban, outcome = "PB=1", con = .8)
# Illustration of asf.selection
# -----------------------------
# Consider the following data set:
d1 <- data.frame(X1 = c(1, 0, 1),
X2 = c(0, 1, 0),
Y = c(1, 1, 0))
ct1 <- configTable(d1, frequency = c(10, 10, 1))
# Both of the following are asfs reaching con=0.95 and cov=1.
condition(c("X1+X2<->Y", "x1+x2<->Y"), ct1)
# Up to version 3.4.0 of the cna package, these two asfs were inferred from
# ct1 by cna(). But the outcome Y is constant in ct1, except for a variation in
# the third row, which is incompatible with X1+X2<->Y and x1+x2<->Y. Subject to
# both of these models, the third row of ct1 is a noisy configuration. Inferring
# difference-making models that are incapable of accounting for the only difference
# in the outcome in the data is inadequate. (Thanks to Luna De Souter for
# pointing out this problem.) Hence, as of version 3.5.0, asfs whose outcome only
# varies in configurations incompatible with the strict crisp-set necessity
# or sufficiency relations expressed by those asfs are not returned anymore.
cna(ct1, outcome = "Y", con = 0.9)
# The old behavior of cna() can be obtained by setting the argument asf.selection
# to its non-default value "none".
cna(ct1, outcome = "Y", con = 0.9, asf.selection = "none")
# Analysis of fuzzy-set data from Aleman (2009).
cna(d.pacts, con = .9, cov = .85)
cna(d.pacts, con = .9, cov = .85, asf.selection = "none")
# In the default setting, cna() does not return any model for d.pacts because
# the outcome takes a value >0.5 in every single case, meaning it does not change
# between presence and absence. No difference-making model should be inferred from
# such data.
# The implications of asf.selection can also be traced by
# the verbose argument:
cna(d.pacts, con = .9, cov = .85, verbose = TRUE)
Run the code above in your browser using DataLab