Learn R Programming

Stochastic Process Model (SPM)

Features

Data simulation

  • Continuous (one- and multiple-dimensions)
  • Discrete (one- and multiple-dimensions)

Optimisation

  • Continuous (one- and multiple-dimensions)
  • Discrete (one- and multiple-dimensions)
  • Time-dependant coefficients (one-dimensional optimisation)

Data imputation (censored time-to-event data imputation)

How to install

Note: for Windows, please install Rtools: https://cran.r-project.org/bin/windows/Rtools/ Note: compilation under Windows may fail if you run Windows on a virtual machine! Then:

install.packages("devtools")
library(devtools)
install_github("izhbannikov/spm")

Examples

Test discrete simulation

library(stpm)
## Test discrete
data <- simdata_discr(N=1000)
pars <- spm_discrete(data)
pars

Test projection

library(stpm)
# Setting up the model
model.par <- list()
model.par$a <- matrix(c(-0.05, 1e-3, 2e-3, -0.05), nrow=2, ncol=2, byrow=TRUE)
model.par$f1 <- matrix(c(90, 35), nrow=1, ncol=2)
model.par$Q <- matrix(c(1e-8, 1e-9, 1e-9, 1e-8), nrow=2, ncol=2, byrow=TRUE)
model.par$f <- matrix(c(80, 27), nrow=1, ncol=2)
model.par$b <- matrix(c(6, 2), nrow=2, ncol=2)
model.par$mu0 <- 1e-5
model.par$theta <- 0.11
# Projection
# Discrete-time model
data.proj.discrete <- spm_projection(model.par, N=100, ystart=c(80, 27), tstart=c(30, 60))
plot(data.proj.discrete$stat$srv.prob, xlim = c(30, 115))
# Continuous-time model
data.proj.continuous <- spm_projection(model.par, N=100, ystart=c(80, 27), model="continuous", gomp=TRUE)
plot(data.proj.continuous$stat$srv.prob, xlim = c(30, 115))

Time-dependent model

library(stpm)
model.par <- list(at = "-0.05", f1t = "80", Qt = "2e-8", ft= "80", bt = "5", mu0t = "1e-5*exp(0.11*t)")
data.proj.time_dependent <- spm_projection(model.par, N=100, ystart=80, model="time-dependent")
plot(data.proj.time_dependent$stat$srv.prob, xlim = c(30,105))

Test prepare_data()

library(stpm)
data <- prepare_data(x=system.file("data","longdat.csv",package="stpm"))
head(data[[1]])
head(data[[2]])

Test sim_pobs()

library(stpm)
dat <- sim_pobs(N=500)
head(dat)

Test spm_pobs()

library(stpm)
#Reading the data:
data <- sim_pobs(N=100)
head(data)
#Parameters estimation:
pars <- spm_pobs(x=data)
pars

Test simdata_cont()

library(stpm)
dat <- simdata_cont(N=50)
head(dat)

Test simdata_discr()

library(stpm)
data <- simdata_discr(N=100)
head(data)

Test simdata_time-dep()

library(stpm)
dat <- simdata_time_dep(N=100)
head(dat)

Test spm_continuous()

library(stpm)
data <- simdata_cont(N=50)
pars <- spm_continuous(dat=data,a=-0.05, f1=80, Q=2e-8, f=80, b=5, mu0=2e-5)
pars

Test spm_discrete()

library(stpm)
data <- simdata_discr(N=10)
pars <- spm_discrete(data)
pars

Test spm()

library(stpm)
data.continuous <- simdata_cont(N=1000)
data.discrete <- simdata_discr(N=1000)
data <- list(data.continuous, data.discrete)
p.discr.model <- spm(data)
p.discr.model
p.cont.model <- spm(data, model="continuous")
p.cont.model
p.td.model <- spm(data, model="time-dependent",formulas=list(at="aa*t+bb", f1t="f1", Qt="Q", ft="f", bt="b", mu0t="mu0"), start=list(a=-0.001, bb=0.05, f1=80, Q=2e-8, f=80, b=5, mu0=1e-3))
p.td.model

Multiple imputation with spm.impute(...)

The SPM offers longitudinal data imputation with results that are better than from other imputation tools since it preserves data structure, i.e. relation between Y(t) and mu(Y(t),t). Below there are two examples of multiple data imputation with function spm.impute(...).

library(stpm)

#######################################################
############## One dimensional case ###################
#######################################################

# Data preparation (short format)#
data <- simdata_discr(N=1000, dt = 2, format="short")

miss.id <- sample(x=dim(data)[1], size=round(dim(data)[1]/4)) # ~25% missing data
incomplete.data <- data
incomplete.data[miss.id,4] <- NA
# End of data preparation #

##### Multiple imputation with SPM #####
imp.data <- spm.impute(x=incomplete.data, id=1, case="xi", t1=3, covariates="y1", minp=1, theta_range=seq(0.075, 0.09, by=0.001))$imputed

##### Look at the incomplete data with missings #####
head(incomplete.data)

##### Look at the imputed data #####
head(imp.data)


#########################################################
################ Two-dimensional case ###################
#########################################################

# Parameters for data simulation #
a <- matrix(c(-0.05, 0.01, 0.01, -0.05), nrow=2)
f1 <- matrix(c(90, 30), nrow=1, byrow=FALSE)
Q <- matrix(c(1e-7, 1e-8, 1e-8, 1e-7), nrow=2)
f0 <- matrix(c(80, 25), nrow=1, byrow=FALSE)
b <- matrix(c(5, 3), nrow=2, byrow=TRUE)
mu0 <- 1e-04
theta <- 0.07
ystart <- matrix(c(80, 25), nrow=2, byrow=TRUE)

# Data preparation #
data <- simdata_discr(N=1000, a=a, f1=f1, Q=Q, f=f0, b=b, ystart=ystart, mu0 = mu0, theta=theta, dt=2, format="short")

# Delete some observations in order to have approx. 25% missing data
incomplete.data <- data
miss.id <- sample(x=dim(data)[1], size=round(dim(data)[1]/4)) 
incomplete.data <- data
incomplete.data[miss.id,4] <- NA
miss.id <- sample(x=dim(data)[1], size=round(dim(data)[1]/4)) 
incomplete.data[miss.id,5] <- NA
# End of data preparation #

##### Multiple imputation with SPM #####
imp.data <- spm.impute(x=incomplete.data, id=1, case="xi", t1=3, covariates=c("y1", "y2"), minp=1, theta_range=seq(0.060, 0.07, by=0.001))$imputed

##### Look at the incomplete data with missings #####
head(incomplete.data)

##### Look at the imputed data #####
head(imp.data)

Copy Link

Version

Install

install.packages('stpm')

Monthly Downloads

202

Version

1.7.12

License

GPL

Maintainer

Ilya Y Zhbannikov

Last Published

September 5th, 2022

Functions in stpm (1.7.12)

prepare_data_discr

Prepares discrete-time dataset.
spm_cont_lin

Continuous multi-dimensional optimization with linear terms in mu only
simdata_time_dep

Simulation function for continuous trait with time-dependant coefficients.
spm_cont_quad_lin

Continuous multi-dimensional optimization with quadratic and linear terms
spm_con_1d

Fitting a 1-D SPM model with constant parameters
simdata_gamma_frailty

This script simulates data using familial frailty model. We use the following variation: gamma(mu, ssq), where mu is the mean and ssq is sigma square. See: https://www.rocscience.com/help/swedge/webhelp/swedge/Gamma_Distribution.htm
spm

A central function that estimates Stochastic Process Model parameters a from given dataset.
spm_con_1d_g

Fitting a 1-D genetic SPM model with constant parameters
simdata_discr

Multi-dimension simulation function
spm.impute

Multiple Data Imputation with SPM
simdata_cont

Multi-dimensional simulation function for continuous-time SPM.
spm_time_dep

A function for the model with time-dependent model parameters.
spm_continuous

Continuous multi-dimensional optimization
spm_projection

A data projection with previously estimated or user-defined parameters. Projections are constructed for a cohort with fixed or normally distributed initial covariates.
trim.trailing

Returns string w/o trailing whitespace
spm_discrete

Discrete multi-dimensional optimization
trim.leading

Returns string w/o leading whitespace
spm_pobs

Continuous-time multi-dimensional optimization for SPM with partially observed covariates (multidimensional GenSPM)
vitstat

Vital (mortality) statistics.
trim

Returns string w/o leading or trailing whitespace
stpm

Stochastic Process Model for Analysis of Longitudinal and Time-to-Event Outcomes
ex_data

This is the longitudinal genetic dataset.
getNextY.discr

An internal function to compute the next value of physiological variable Y based on discrete-time model (Akushevich et. al., 2005)
getNextY.discr.m

An internal function to compute next m based on dicrete-time model
getNextY.cont

An internal function to compute next Y based on continous-time model (Yashin et. al., 2007)
assign_to_global

function loading results in global environment
LRTest

Likelihood-ratio test
getNextY.cont2

An internal function to compute next value of physiological variable Y
getPrevY.discr

An internal function to compute previous value of physiological variable Y based on discrete-time model
sigma_sq

An internal function to compute sigma square analytically
getPrevY.discr.m

An internal function to compute previous m based on discrete-time model
func1

An internal function to compute m and gamma based on continuous-time model (Yashin et. al., 2007)
longdat

This is the longitudinal dataset.
get.column.index

An internal function to obtain column index by its name
m

An internal function to compute m from
make.short.format

An internal function which construct short data format from a given long
sim_pobs

Multi-dimension simulation function for data with partially observed covariates (multidimensional GenSPM) with arbitrary intervals
prepare_data_cont

Prepares continuouts-time dataset.
mu

An internal function to compute mu
prepare_data

Data pre-processing for analysis with stochastic process model methodology.