
Last chance! 50% off unlimited learning
Sale ends in
Fit parametric regression models to the outcome distribution and optionally
also parametric regression models for the joint distribution of the predictors
structural equation models.
Then the function sim.synth
can be called on the resulting object to
to simulate from the parametric model based on the machinery of the lava
package
synthesize(object, data, ...)# S3 method for formula
synthesize(
object,
data,
recursive = FALSE,
max.levels = 10,
verbose = FALSE,
...
)
# S3 method for lvm
synthesize(
object,
data,
max.levels = 10,
logtrans = NULL,
verbose = FALSE,
fix.names = FALSE,
...
)
lava object
Specification of the synthesizing model structures. Either a formula
or a lvm
object. See examples.
Data to be synthesized.
Not used yet.
Let covariates recursively depend on each other.
Integer used to guess which variables are categorical. When set to 10
, the default,
variables with less than 10 unique values in data are treated as categorical.
Logical. If TRUE
then more messages and warnings are provided.
Vector of covariate names that should be log-transformed. This is primarily for internal use.
Fix possible problematic covariate names.
Thomas A. Gerds <tag@biostat.ku.dk>
Synthesizes survival data (also works for linear models and generalized linear models).
The idea is to be able to simulate new data sets that mimic the original data.
See the vignette vignette("synthesize",package = "riskRegression")
for more details.
The simulation engine is: lava.
lvm
# pbc data
library(survival)
library(lava)
data(pbc)
pbc <- na.omit(pbc[,c("time","status","sex","age","bili")])
pbc$logbili <- log(pbc$bili)
v_synt <- synthesize(object=Surv(time,status)~sex+age+logbili,data=pbc)
d <- simsynth(v_synt,1000)
fit_sim <- coxph(Surv(time,status==1)~age+sex+logbili,data=d)
fit_real <- coxph(Surv(time,status==1)~age+sex+logbili,data=pbc)
# compare estimated log-hazard ratios between simulated and real data
cbind(coef(fit_sim),coef(fit_real))
u <- lvm()
distribution(u,~sex) <- binomial.lvm()
distribution(u,~age) <- normal.lvm()
distribution(u,~trt) <- binomial.lvm()
distribution(u,~logbili) <- normal.lvm()
u <-eventTime(u,time~min(time.cens=0,time.transplant=1,time.death=2), "status")
lava::regression(u,logbili~age+sex) <- 1
lava::regression(u,time.transplant~sex+age+logbili) <- 1
lava::regression(u,time.death~sex+age+logbili) <- 1
lava::regression(u,time.cens~1) <- 1
transform(u,logbili~bili) <- function(x){log(x)}
u_synt <- synthesize(object=u, data=na.omit(pbc))
set.seed(8)
d <- simsynth(u_synt,n=1000)
# note: synthesize may relabel status variable
fit_sim <- coxph(Surv(time,status==1)~age+sex+logbili,data=d)
fit_real <- coxph(Surv(time,status==1)~age+sex+log(bili),data=pbc)
# compare estimated log-hazard ratios between simulated and real data
cbind(coef(fit_sim),coef(fit_real))
#
# Cancer data
#
data(cancer)
b <- lvm()
distribution(b,~rx) <- binomial.lvm()
distribution(b,~age) <- normal.lvm()
distribution(b,~resid.ds) <- binomial.lvm()
distribution(b,~ecog.ps) <- binomial.lvm()
lava::regression(b,time.death~age+rx+resid.ds) <- 1
b<-eventTime(b,futime~min(time.cens=0,time.death=1), "fustat")
b_synt <- synthesize(object = b, data = ovarian)
D <- simsynth(b_synt,1000)
fit_real <- coxph(Surv(futime,fustat)~age+rx+resid.ds, data=ovarian)
fit_sim <- coxph(Surv(futime,fustat)~age+rx+resid.ds, data=D)
cbind(coef(fit_sim),coef(fit_real))
w_synt <- synthesize(object=Surv(futime,fustat)~age+rx+resid.ds, data=ovarian)
D <- simsynth(w_synt,1000)
fit_sim <- coxph(Surv(futime,fustat==1)~age+rx+resid.ds,data=D)
fit_real <- coxph(Surv(futime,fustat==1)~age+rx+resid.ds,data=ovarian)
# compare estimated log-hazard ratios between simulated and real data
cbind(coef(fit_sim),coef(fit_real))
Run the code above in your browser using DataLab