# NOT RUN {
Correl <- 0.85
N <- 10000
data <- data.table::data.table(Target = runif(N))
data[, x1 := qnorm(Target)]
data[, x2 := runif(N)]
data[, Independent_Variable1 := log(pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))]
data[, Independent_Variable2 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))]
data[, Independent_Variable3 := exp(pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))]
data[, Independent_Variable4 := exp(exp(pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2))))]
data[, Independent_Variable5 := sqrt(pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))]
data[, Independent_Variable6 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))^0.10]
data[, Independent_Variable7 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))^0.25]
data[, Independent_Variable8 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))^0.75]
data[, Independent_Variable9 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))^2]
data[, Independent_Variable10 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))^4]
data[, Target := as.factor(
ifelse(Independent_Variable2 < 0.20, "A",
ifelse(Independent_Variable2 < 0.40, "B",
ifelse(Independent_Variable2 < 0.6, "C",
ifelse(Independent_Variable2 < 0.8, "D", "E")))))]
data[, Independent_Variable11 := as.factor(
ifelse(Independent_Variable2 < 0.15, "A",
ifelse(Independent_Variable2 < 0.45, "B",
ifelse(Independent_Variable2 < 0.65, "C",
ifelse(Independent_Variable2 < 0.85, "D", "E")))))]
data[, ':=' (x1 = NULL, x2 = NULL)]
Outliers <- ProblematicRecords(data,
ColumnNumbers = NULL,
Threshold = 0.95,
MaxMem = "28G",
NThreads = -1)
# }
Run the code above in your browser using DataLab