# Create a random sequence
sequence <- create_sequence("DNA", len = 2000, seed = 1)
## Alternatively, we could obtain a sequence as follows:
## > data("lambda", package = "dsmmR")
## > sequence <- c(lambda)
states <- sort(unique(sequence))
degree <- 3
# ===========================================================================
# Nonparametric Estimation.
# Fitting a random sequence under distributions of unknown shape.
# ===========================================================================
# ---------------------------------------------------------------------------
# Both p and f are drifting - Model 1.
# ---------------------------------------------------------------------------
obj_model_1 <- fit_dsmm(sequence = sequence,
degree = degree,
f_is_drifting = TRUE,
p_is_drifting = TRUE,
states = states,
initial_dist = "freq",
estimation = "nonparametric", # default value
f_dist = NULL # default value
)
cat(paste0("We fitted a sequence with ", obj_model_1$Model, ",\n",
"model size: n = ", obj_model_1$model_size, ",\n",
"length of state space: s = ", obj_model_1$s, ",\n",
"maximum sojourn time: k_max = ", obj_model_1$k_max, " and\n",
"polynomial (drifting) Degree: d = ", obj_model_1$degree, ".\n"))
# Get the drifting p and f arrays.
p_drift <- obj_model_1$dist$p_drift
f_drift <- obj_model_1$dist$f_drift
cat(paste0("Dimension of p_drift: (s, s, d + 1) = (",
paste(dim(p_drift), collapse = ", "), ").\n",
"Dimension of f_drift: (s, s, k_max, d + 1) = (",
paste(dim(f_drift), collapse = ", "), ").\n"))
# We can even check the embedded Markov chain and the sojourn times
# directly from the returned object, if we wish to do so.
# This is achieved through the `base::rle()` function, used on `sequence`.
model_emc <- obj_model_1$emc
model_sojourn_times <- obj_model_1$soj_times
# ---------------------------------------------------------------------------
# Fitting the sequence when p is drifting and f is not drifting - Model 2.
# ---------------------------------------------------------------------------
obj_model_2 <- fit_dsmm(sequence = sequence,
degree = degree,
f_is_drifting = FALSE,
p_is_drifting = TRUE)
cat(paste0("We fitted a sequence with ", obj_model_2$Model, ".\n"))
# Get the drifting p and non-drifting f arrays.
p_drift_2 <- obj_model_2$dist$p_drift
f_notdrift <- obj_model_2$dist$f_notdrift
all.equal.numeric(p_drift, p_drift_2) # p is the same as in Model 1.
cat(paste0("Dimension of f_notdrift: (s, s, k_max) = (",
paste(dim(f_notdrift), collapse = ", "), ").\n"))
# ---------------------------------------------------------------------------
# Fitting the sequence when f is drifting and p is not drifting - Model 3.
# ---------------------------------------------------------------------------
obj_model_3 <- fit_dsmm(sequence = sequence,
degree = degree,
f_is_drifting = TRUE,
p_is_drifting = FALSE)
cat(paste0("We fitted a sequence with ", obj_model_3$Model, ".\n"))
# Get the drifting f and non-drifting p arrays.
p_notdrift <- obj_model_3$dist$p_notdrift
f_drift_3 <- obj_model_3$dist$f_drift
all.equal.numeric(f_drift, f_drift_3) # f is the same as in Model 1.
cat(paste0("Dimension of f_notdrift: (s, s) = (",
paste(dim(p_notdrift), collapse = ", "), ").\n"))
# ===========================================================================
# Parametric Estimation
# Fitting a random sequence under distributions of known shape.
# ===========================================================================
### Comments
### 1. For the parametric estimation it is recommended to use a common set
### of distributions while only the parameters (of the sojourn times)
### are drifting. This results in (generally) higher accuracy.
### 2. This process is similar to that used in `dsmm_parametric()`.
s <- length(states)
# Getting the distributions for the states.
# Rows correspond to previous state `u`.
# Columns correspond to next state `v`.
f_dist_1 <- matrix(c(NA, "unif", "dweibull", "nbinom",
"pois", NA, "pois", "dweibull",
"geom", "pois", NA, "geom",
"dweibull", 'geom', "pois", NA),
nrow = s, ncol = s, byrow = TRUE)
f_dist <- array(f_dist_1, dim = c(s, s, degree + 1))
dim(f_dist)
# ---------------------------------------------------------------------------
# Both p and f are drifting - Model 1.
# ---------------------------------------------------------------------------
obj_fit_parametric <- fit_dsmm(sequence = sequence,
degree = degree,
f_is_drifting = TRUE,
p_is_drifting = TRUE,
states = states,
initial_dist = 'unif',
estimation = 'parametric',
f_dist = f_dist)
cat("The class of `obj_fit_parametric` is : (",
paste0(class(obj_fit_parametric), collapse = ', '), ").\n")
# Estimated parameters.
f_params <- obj_fit_parametric$dist$f_drift_parameters
# The drifting sojourn time distribution parameters.
f_0 <- f_params[,,,1]
f_1.3 <- f_params[,,,2]
f_2.3 <- f_params[,,,3]
f_1 <- f_params[,,,4]
params <- paste0('q = ', round(f_params["c", "t", 1, ], 3),
', beta = ', round(f_params["c", "t", 2, ], 3))
f_names <- c("f_0", paste0("f_", 1:(degree-1), "/", degree), "f_1")
all_names <- paste(f_names, ":", params)
cat("The drifting of the parameters for passing from \n",
"`u` = 'c' to `v` = 't' under a discrete Weibull distribution is:",
"\n", all_names[1], "\n", all_names[2],
"\n", all_names[3], "\n", all_names[4])
# ---------------------------------------------------------------------------
# f is not drifting, only p is drifting - Model 2.
# ---------------------------------------------------------------------------
obj_fit_parametric_2 <- fit_dsmm(sequence = sequence,
degree = degree,
f_is_drifting = FALSE,
p_is_drifting = TRUE,
initial_dist = 'unif',
estimation = 'parametric',
f_dist = f_dist_1)
cat("The class of `obj_fit_parametric_2` is : (",
paste0(class(obj_fit_parametric_2), collapse = ', '), ").\n")
# Estimated parameters.
f_params_2 <- obj_fit_parametric_2$dist$f_notdrift_parameters
params_2 <- paste0('q = ', round(f_params_2["c", "t", 1], 3),
', beta = ', round(f_params_2["c", "t", 2], 3))
cat("Not-drifting parameters for passing from ",
"`u` = 'c' to `v` = 't' \n under a discrete Weibull distribution are:\n",
paste("f :", params_2))
# ===========================================================================
# `simulate()` and `get_kernel()` can be used for the two objects,
# `dsmm_fit_nonparametric` and `dsmm_fit_parametric`.
# ===========================================================================
sim_seq_nonparametric <- simulate(obj_model_1, nsim = 10)
str(sim_seq_nonparametric)
kernel_drift_parametric <- get_kernel(obj_fit_parametric, klim = 10)
str(kernel_drift_parametric)
Run the code above in your browser using DataLab