## We will first begin by simulating data in 11 covariates and a continuous outcome
## with significant interaction terms and correlations amongst covariates (to simulate a
## non-randomized experiment with a strongly non-linear underlying model).
## First, we will create a matrix with the input variables. The inout variables will all be
## categorical variables.
m=matrix(nrow=1000,ncol=12)
for ( i in 1:10){
m[,i]=round(runif(1000,min=1,max=3))
}
m[,11]=rbinom(1000,1,0.5)
## Next, we will simulate the output variable and include interaction terms
for(i in 1:nrow(m)){
a=(2*m[i,11] + 0.5*m[i,1] - 4*m[i,2] + 2.3*m[i,3] +
0.8*m[i,4] -0.7*m[i,5] - 4*m[i,6] + 3.6*m[i,7] +
1.2*m[i,8] - 11*m[i,9] - 2.1*m[i,10] + 2.3*m[i,3]*m[i,4]
-3.5*m[i,5]*m[i,6]*m[i,7] + 8*m[i,1]*m[i,2]*m[i,9]
+ 2.1*m[i,2]*m[i,6]*m[i,8] + 5*m[i,4]*m[i,7]*m[i,9]
+ 8*m[i,3]*m[i,10]*m[i,6] + 11*m[i,7]*m[i,8]*m[i,5] + 8*m[i,3]*m[i,9]*m[i,2])
m[i,12]=rnorm(1,a,1)
}
## We are interested in determining the coefficient of covariate 11 which is 2.
## Tmost straightforward
## way of doing this is to use simple linear regression as follows
m=as.data.frame(m)
k=lm(m[,12]~.,data=m[,(1:11)])
## The value of the coefficient of variable 11 found by the regression can be retrieved using
k$coeff[12]
## We can now use the stratacont() function to find a more accurate estimation of the coefficient
g=stratacont(11,12,m)
Run the code above in your browser using DataLab