# NOT RUN {
# create a small simulated data set
# with 400 observations and
# 4 variables
set.seed(23)
x1 = rnorm(400, 10, 2)
x2 = rnorm(400, 5, 3)
x3 = rnorm(400, -2, 1)
x4 = rnorm(400, 0, 5)
y = 2.4 - 0.6 * x1 + 5.5 * x2 - 7.2 * x3 + 5.7 * x4 + rnorm(400)
# all in one data.frame
data = data.frame(x1, x2, x3, x4, y)
# linear model based on original data set
lm(y ~ ., data = data)
# Calculate a RAD/"R"-sketch with epsilon = 0.2
s1 = sketch(data, epsilon = 0.2, method = 'R', affine = TRUE)
dim(s1)
# very similar results, intercept should be omitted
lm(y ~ . - 1, data = s1)
# use option "obs_sketch" to fix the new number of observations
s2 = sketch(data, obs_sketch = 200, method = 'R', affine = TRUE)
dim(s2)
# some more differences as sketch is smaller
lm(y ~ . - 1, data = s2)
# calculate SRHT/"S"-sketch
s3 = sketch(data, epsilon = 0.2, method = 'S', affine = TRUE)
dim(s3)
lm(y ~ . - 1, data = s3)
# calculate CW/"C"-sketch
s4 = sketch(data, epsilon = 0.2, method = 'C', affine = TRUE)
dim(s4)
# sketch is smaller, because the number of variables is very small
# CW-sketches require a lot more observations compared to RAD/SRHT
# when number of variables increases
lm(y ~ . - 1, data = s4)
# same simulated data set, but with intercept added to data.frame
data2 = data.frame(x0 = 1, x1, x2, x3, x4, y)
lm(y ~ . - 1, data = data2)
# Same as s1, but now option affine = FALSE is adequate
s5 = sketch(data2, epsilon = 0.2, method = 'R', affine = FALSE)
dim(s5)
lm(y ~ . - 1, data = s5)
# }
Run the code above in your browser using DataLab