# ---------------------------------------------------------
#
# Example 1:
# Run RGM based on individual-level data with Threshold prior
# based on the model Y = AY + BX + CU + E
#
# Data Generation
set.seed(9154)
# Number of data points
n = 10000
# Number of response variables, instrument variables, and covariates
p = 3
k = 4
l = 2
# Initialize causal interaction matrix between response variables
A = matrix(sample(c(-0.1, 0.1), p^2, replace = TRUE), p, p)
# Diagonal entries of A matrix will always be 0
diag(A) = 0
# Make the network sparse
A[sample(which(A != 0), length(which(A != 0)) / 2)] = 0
# Create D matrix (Indicator matrix: rows = responses, cols = instruments)
D = matrix(0, nrow = p, ncol = k)
# Manually assign values to D matrix
D[1, 1:2] = 1 # First response variable is influenced by the first 2 instruments
D[2, 3] = 1 # Second response variable is influenced by the 3rd instrument
D[3, 4] = 1 # Third response variable is influenced by the 4th instrument
# Initialize B matrix (effects of instruments on responses) based on D
B = matrix(0, p, k)
for (i in 1:p) {
for (j in 1:k) {
if (D[i, j] == 1) {
B[i, j] = 1
}
}
}
# Generate covariate data matrix U (n x l)
U = matrix(rnorm(n * l, 0, 1), nrow = n, ncol = l)
# Initialize C matrix (effects of covariates on responses), similar to B
C = matrix(0, p, l)
C[sample(length(C), size = ceiling(length(C) / 2))] = 1 # simple sparse pattern
# Define Sigma matrix (diagonal error variance)
Sigma = diag(p)
# Compute Mult_Mat
Mult_Mat = solve(diag(p) - A)
# Calculate Variance
Variance = Mult_Mat %*% Sigma %*% t(Mult_Mat)
# Generate instrument data matrix X (n x k)
X = matrix(rnorm(n * k, 0, 1), nrow = n, ncol = k)
# Initialize response data matrix Y (n x p)
Y = matrix(0, nrow = n, ncol = p)
# Generate response data matrix based on the model
for (i in 1:n) {
mu_i = Mult_Mat %*% (B %*% X[i, ] + C %*% U[i, ])
Y[i, ] = MASS::mvrnorm(n = 1, mu = mu_i, Sigma = Variance)
}
# Define a function to create smaller arrowheads
smaller_arrowheads = function(graph) {
igraph::E(graph)$arrow.size = 1
return(graph)
}
# Print true causal interaction matrices
A
B
C
# Plot the true graph structure between response variables
plot(smaller_arrowheads(igraph::graph_from_adjacency_matrix((A != 0) * 1,
mode = "directed")), layout = igraph::layout_in_circle, main = "True Graph")
# Apply RGM on individual-level data with covariates for Threshold prior
Output = RGM(X = X, Y = Y, U = U, D = D,
prior = "Threshold", SigmaStarModel = "diagonal")
# Get the graph structure between response variables
Output$zAEst
# Get the estimated causal strength matrix between response variables
Output$AEst
# Get the graph structure between response and instrument variables
Output$zBEst
# Get the estimated causal strength matrix between response and instrument variables
Output$BEst
# (Only when covariates are provided) Get the estimated covariate effects
Output$CEst
# Get the estimated causal network between responses
Output$Graph
# Plot the estimated causal network
plot(Output$Graph, main = "Estimated Causal Network")
# Plot posterior log-likelihood
plot(Output$LLPst, type = "l", xlab = "Number of Iterations", ylab = "Log-likelihood")
# -----------------------------------------------------------------
# Example 2:
# Run RGM based on summary-level data with Spike and Slab prior
# based on the model Y = AY + BX + CU + E
# Data Generation
set.seed(9154)
# Number of data points
n = 10000
# Number of response variables, instrument variables, and covariates
p = 3
k = 4
l = 2
# Initialize causal interaction matrix between response variables
A = matrix(sample(c(-0.1, 0.1), p^2, replace = TRUE), p, p)
# Diagonal entries of A matrix will always be 0
diag(A) = 0
# Make the network sparse
A[sample(which(A != 0), length(which(A != 0)) / 2)] = 0
# Create D matrix (Indicator matrix where each row corresponds to a response variable
# and each column corresponds to an instrument variable)
D = matrix(0, nrow = p, ncol = k)
# Manually assign values to D matrix
D[1, 1:2] = 1 # First response variable is influenced by the first 2 instruments
D[2, 3] = 1 # Second response variable is influenced by the 3rd instrument
D[3, 4] = 1 # Third response variable is influenced by the 4th instrument
# Initialize B matrix
B = matrix(0, p, k)
for (i in 1:p) {
for (j in 1:k) {
if (D[i, j] == 1) {
B[i, j] = 1
}
}
}
# Generate covariate data matrix U (n x l)
U = matrix(rnorm(n * l, 0, 1), nrow = n, ncol = l)
# Initialize C matrix (effects of covariates on responses)
C = matrix(0, p, l)
C[sample(length(C), size = ceiling(length(C) / 2))] = 1 # simple sparse pattern
# Use a full (non-diagonal) Sigma matrix
Sigma = crossprod(matrix(rnorm(p * p), p, p)) # SPD matrix
Mult_Mat = solve(diag(p) - A)
Variance = Mult_Mat %*% Sigma %*% t(Mult_Mat)
# Generate instrument data matrix
X = matrix(rnorm(n * k, 0, 1), nrow = n, ncol = k)
# Initialize response data matrix
Y = matrix(0, nrow = n, ncol = p)
# Generate response data matrix based on instrument + covariate effects
for (i in 1:n) {
mu_i = Mult_Mat %*% (B %*% X[i, ] + C %*% U[i, ])
Y[i, ] = MASS::mvrnorm(n = 1, mu = mu_i, Sigma = Variance)
}
# Calculate summary-level data (including covariate summaries)
Syy = t(Y) %*% Y / n
Syx = t(Y) %*% X / n
Sxx = t(X) %*% X / n
Syu = t(Y) %*% U / n
Sxu = t(X) %*% U / n
Suu = t(U) %*% U / n
# Print true causal interaction matrices
A
B
C
# Plot the true graph structure between response variables
plot(smaller_arrowheads(igraph::graph_from_adjacency_matrix(((A != 0) * 1),
mode = "directed")), layout = igraph::layout_in_circle, main = "True Graph")
# Apply RGM on summary-level data for Spike and Slab Prior + SSSL SigmaStarModel
Output = RGM(Syy = Syy, Syx = Syx, Sxx = Sxx,
Syu = Syu, Sxu = Sxu, Suu = Suu,
D = D, n = n, prior = "Spike and Slab",
SigmaStarModel = "SSSL")
# Get the graph structure between response variables
Output$zAEst
# Get the estimated causal strength matrix between response variables
Output$AEst
# Get the graph structure between response and instrument variables
Output$zBEst
# Get the estimated causal strength matrix between response and instrument variables
Output$BEst
# Get the estimated covariate effect matrix
Output$CEst
# (Only when SigmaStarModel = "SSSL") inclusion probabilities for Sigma*
Output$ZEst
# Get the estimated causal network between responses
Output$Graph
# Plot the estimated causal network
plot(Output$Graph, main = "Estimated Causal Network")
# Plot posterior log-likelihood
plot(Output$LLPst, type = "l", xlab = "Number of Iterations", ylab = "Log-likelihood")
# -----------------------------------------------------------------
# Example 3:
# Run RGM based on Sxx, Beta and SigmaHat with Spike and Slab prior
# based on the model Y = AY + BX + E
# Data Generation
set.seed(9154)
# Number of datapoints
n = 10000
# Number of response variables and number of instrument variables
p = 3
k = 4
# Initialize causal interaction matrix between response variables
A = matrix(sample(c(-0.1, 0.1), p^2, replace = TRUE), p, p)
# Diagonal entries of A matrix will always be 0
diag(A) = 0
# Make the network sparse
A[sample(which(A != 0), length(which(A != 0)) / 2)] = 0
# Create D matrix (Indicator matrix where each row corresponds to a response variable
# and each column corresponds to an instrument variable)
D = matrix(0, nrow = p, ncol = k)
# Manually assign values to D matrix
D[1, 1:2] = 1 # First response variable is influenced by the first 2 instruments
D[2, 3] = 1 # Second response variable is influenced by the 3rd instrument
D[3, 4] = 1 # Third response variable is influenced by the 4th instrument
# Initialize B matrix
B = matrix(0, p, k) # Initialize B matrix with zeros
# Calculate B matrix based on D matrix
for (i in 1:p) {
for (j in 1:k) {
if (D[i, j] == 1) {
B[i, j] = 1 # Set B[i, j] to 1 if D[i, j] is 1
}
}
}
# --- Full (non-diagonal) SPD error covariance matrix Sigma ---
R = matrix(rnorm(p * p), p, p)
Sigma = crossprod(R) # SPD
Sigma = Sigma / mean(diag(Sigma)) # scale so diagonal magnitudes are ~1
# Compute Mult_Mat
Mult_Mat = solve(diag(p) - A)
# Calculate Variance of Y given X
Variance = Mult_Mat %*% Sigma %*% t(Mult_Mat)
# Generate DNA expressions
X = matrix(rnorm(n * k, 0, 1), nrow = n, ncol = k)
# Initialize response data matrix
Y = matrix(0, nrow = n, ncol = p)
# Generate response data matrix based on instrument data matrix
for (i in 1:n) {
Y[i, ] = MASS::mvrnorm(n = 1, Mult_Mat %*% B %*% X[i, ], Variance)
}
# Centralize Data (required for Beta/SigmaHat input option)
Y = t(t(Y) - colMeans(Y))
X = t(t(X) - colMeans(X))
# Calculate Sxx
Sxx = t(X) %*% X / n
# Generate Beta matrix and SigmaHat
Beta = matrix(0, nrow = p, ncol = k)
SigmaHat = matrix(0, nrow = p, ncol = k)
for (i in 1:p) {
for (j in 1:k) {
fit = lm(Y[, i] ~ X[, j])
Beta[i, j] = fit$coefficients[2]
SigmaHat[i, j] = sum(fit$residuals^2) / n
}
}
# Print true causal interaction matrices between response variables
# and between response and instrument variables
A
B
# Plot the true graph structure between response variables
smaller_arrowheads = function(graph) {
igraph::E(graph)$arrow.size = 1
return(graph)
}
plot(smaller_arrowheads(
igraph::graph_from_adjacency_matrix(((A != 0) * 1), mode = "directed")
), layout = igraph::layout_in_circle, main = "True Graph")
# Apply RGM based on Sxx, Beta and SigmaHat for Spike and Slab Prior
# Use IW structure for SigmaStarModel
Output = RGM(Sxx = Sxx, Beta = Beta, SigmaHat = SigmaHat,
D = D, n = n, prior = "Spike and Slab",
SigmaStarModel = "IW")
# Get the graph structure between response variables
Output$zAEst
# Get the estimated causal strength matrix between response variables
Output$AEst
# Get the graph structure between response and instrument variables
Output$zBEst
# Get the estimated causal strength matrix between response and instrument variables
Output$BEst
# Get the estimated causal network between responses
Output$Graph
# Plot the estimated causal network
plot(Output$Graph, main = "Estimated Causal Network")
# Plot posterior log-likelihood
plot(Output$LLPst, type = 'l', xlab = "Number of Iterations", ylab = "Log-likelihood")
Run the code above in your browser using DataLab