# NOT RUN {
 ##########################################
 ####### 1st example:
 ####### Simulate data with interventions
set.seed(1)
    ## sample size n
      n <- 4000
    ## 5 predictor variables
      p  <- 5
    ## simulate as independent Gaussian variables
      X <- matrix(rnorm(n*p),nrow=n)
    ## divide data into observational (ExpInd=1) and interventional (ExpInd=2)
      ExpInd <- c(rep(1,n/2),rep(2,n/2))
    ## for interventional data (ExpInd==2): change distribution
      X[ExpInd==2,] <- sweep(X[ExpInd==2,],2, 5*rnorm(p) ,FUN="*")
    ## first two variables are the causal predictors of Y
      beta <- c(1,1,rep(0,p-2))
    ## response variable Y
      Y <- as.numeric(X%*%beta + rnorm(n))
   ## optinal: make last variable a child of Y (so last variable is non-causal for Y)
      X[,p] <- 0.3*Y + rnorm(n)
 ####### Compute "Invariant Causal Prediction" Confidence Intervals
      icp <- ICP(X,Y,ExpInd)
 ###### Print/plot/show summary of output
      print(icp)
      plot(icp)
#### compare with linear model 
      cat("\n compare with linear model  \n")            
      print(summary(lm(Y~X)))
##########################################
 ####### 2nd example:
 ####### Simulate a DAG where X1 -> Y, Y -> X2 and Y -> X3
 ####### noise interventions on second half of data on X1
 ####### structure of DAG (at Y -> X2) is changing under interventions
     n1 <- 400
     n2 <- 500
     ExpInd <- c(rep(1,n1), rep(2,n2))
   ## index for observational (ExpInd=1) and intervention data (ExpInd=2)
     X1 <- c(rnorm(n1), 2 * rnorm(n2) + 1)
     Y <- 0.5 * X1 + 0.2 * rnorm(n1 + n2)
     X2 <- c(1.5 * Y[1:n1]      + 0.4 * rnorm(n1),
            -0.3 * Y[(n1+1):n2] + 0.4 * rnorm(n2))
     X3 <-  -0.4 * Y            + 0.2 * rnorm(n1 + n2)
     X <- cbind(X1, X2, X3)
 ### Compute "Invariant Causal Prediction" Confidence Intervals
  ## use a rank-based test to detect shift in distribution of residuals
     icp <- ICP(X, Y, ExpInd,test="ranks")
  ## use a Kolmogorov-Smirnov test to detect shift in distribution of residuals
     icp <- ICP(X, Y, ExpInd,test="ks")
  ## can also supply test as a function
  ## here chosen to be equivalent to option "ks" above
     icp <- ICP(X, Y, ExpInd,test=function(x,z) ks.test(x,z)$p.val)
 ## use a test based on normal approximations
     icp <- ICP(X, Y, ExpInd, test="normal")
 ### Print/plot/show summary of output
     print(icp)
     plot(icp)
#### compare with linear model 
     cat("\n compare with linear model \n")            
     print(summary(lm(Y~X)))
# }
# NOT RUN {
 ##########################################
 ####### 3rd example:
 ####### College Distance data 
     library(AER)
     data("CollegeDistance")
     CD <- CollegeDistance
  ##  define two experimental settings by
  ##  distance to closest 4-year college
     ExpInd <- list()
     ExpInd[[1]] <- which(CD$distance < quantile(CD$distance,0.5))
     ExpInd[[2]] <- which(CD$distance >= quantile(CD$distance,0.5))
  ## target variable is binary (did education lead at least to BA degree?)
     Y <- as.factor(CD$education>=16)
  ## use these predictors
     X <- CD[,c("gender","ethnicity","score","fcollege","mcollege","home",
        "urban","unemp","wage","tuition","income","region")]
  ## searching all subsets (use selection="lasso" or selection="stability"
  ##     to select a subset of subsets to search)
  ## with selection="all" the function will take several minutes
    icp <- ICP(X,Y,ExpInd,selection="all",alpha=0.1)
  ## Print/plot/show summary of output
     print(icp)
     summary(icp)
     plot(icp)
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab