Learn R Programming

robregcc (version 1.0)

robregcc_sim: Simulation data

Description

Simulate data for the robust regression with compositional covariates

Usage

robregcc_sim(n, betacc, O, Sigma, levg, snr, shft, m, C, out = list())

Arguments

n

sample size

betacc

model parameter satisfying compositional covariates

O

number of outlier

Sigma

covariance matrix of simulated predictors

levg

1/0 whether to include leveraged observation or not

snr

noise to signal ratio

shft

multiplying factor to model variance for creating outlier

m

test sample size

C

subcompositional matrix

out

list for obtaining output with simulated data structure

Value

a list containing simulated output.

References

Mishra, A., Mueller, C.,(2019) Robust regression with compositional covariates. In prepration. arXiv:1909.04990.

Examples

Run this code
# NOT RUN {
 
## Simulation example:

library(robregcc)
library(magrittr)

## n: sample size 
## p: number of predictors
## o: fraction of observations as outliers
## L: {0,1} => leveraged {no, yes}, indicator variable for outlier type
## shFac: multiplicative factor of true standard deviation by which O, 
##         i.e., outliers fraction of observations are shifted. 
## ngrp: number of subgroup in the model 
## snr: noise to signal ratio for computing true standard deviation of error 

p <- 80                            
n <- 300                           
o <- 0.10                            
L <- 1                              
shFac <- 6       # shFac = {6,8} corresponds to {moderate, high} outlier 
ngrp <- 4                         
snr <- 3   
sp_beta <- 1

# Set seed for reproducibility 
example_seed <- 2*p+1               
set.seed(example_seed) 

## 1. coefficient and subcomposition matrix C
if(sp_beta == 1){         ## sparse model coefficient matrix 
  #' subcomposition matrix C
  C1 <- matrix(0,ngrp,23)
  tind <- c(0,10,16,20,23)
  for(ii in 1:ngrp)
    C1[ii,(tind[ii]+1):tind[ii+1]] <- 1
  C <- matrix(0,ngrp,p)
  C[,1:ncol(C1)] <- C1            
  
  
  # model coefficient beta; Follow examples from [Pixu Shi 2016]
  beta <- c(1, - 0.8, 0.4, 0, 0, - 0.6, 0, 0, 0, 0, -1.5, 
            0, 1.2, 0, 0, 0.3)
  beta <- c(beta,rep(0,p-length(beta)))
  tcrossprod(C,t(beta)) ##' sanity check
}  else if(sp_beta == 0) { ## non sparse model coefficient matrix 
  # subcomposition matrix C
  j <- 1; C <- matrix(0,ngrp,p)
  for(ii in 1:ngrp){
    tv <-  min(c(round(ii*p/ngrp),p))
    C[ii,j:tv] <- 1
    j <- tv+1
  }
  
  # model coefficient beta;
  beta <- sample(c(1,-1),p,replace = T)*runif(p,.3,.4)
  beta <- svd(t(C))$u %>% tcrossprod() %>% 
    subtract(diag(p),.) %>% 
    tcrossprod(.,t(beta))
  tcrossprod(C,t(beta)) ## sanity check
}
# number of outliers
O <- o*n  

## 2. simulate response and predictor matrix, i.e., X, y
Sigma  <- 1:p %>% outer(.,.,'-') %>% abs(); Sigma  <- 0.5^Sigma
data.case <- vector("list",1)
data.case <- robregcc_sim(n,beta,O = O,Sigma,levg = L, snr,shft = shFac,0,
                          C,out=data.case)

# We have saved a copy of simulated data in the package 
# with name simulate_robregcc_sp and simulate_robregcc_nsp

X <- data.case$X                          # predictor matrix
y <- data.case$y                          # model response 


# }

Run the code above in your browser using DataLab