# Start the graphics device driver to save all plots in a pdf format
pdf(file = "Example.pdf")
# Get the stage C prostate cancer data from the rpart package
library(rpart)
data(stagec)
# Split the stages into several columns
dataCancer <- cbind(stagec[,c(1:3,5:6)],
gleason4 = 1*(stagec[,7] == 4),
gleason5 = 1*(stagec[,7] == 5),
gleason6 = 1*(stagec[,7] == 6),
gleason7 = 1*(stagec[,7] == 7),
gleason8 = 1*(stagec[,7] == 8),
gleason910 = 1*(stagec[,7] >= 9),
eet = 1*(stagec[,4] == 2),
diploid = 1*(stagec[,8] == "diploid"),
tetraploid = 1*(stagec[,8] == "tetraploid"),
notAneuploid = 1-1*(stagec[,8] == "aneuploid"))
# Remove the incomplete cases
dataCancer <- dataCancer[complete.cases(dataCancer),]
# Load a pre-stablished data frame with the names and descriptions of all variables
data(cancerVarNames)
# Get a Cox proportional hazards model using:
# - The default parameters
md <- FRESA.Model(formula = Surv(pgtime, pgstat) ~ 1,
data = dataCancer,
var.description = cancerVarNames[,2])
# Get a logistic regression model using
# - The default parameters
md <- FRESA.Model(formula = pgstat ~ 1,
data = dataCancer,
var.description = cancerVarNames[,2])
# Get a logistic regression model using:
# - redidual-based optimization
md <- FRESA.Model(formula = pgstat ~ 1,
data = dataCancer,
OptType = "Residual",
var.description = cancerVarNames[,2])
# Rank the variables:
# - Analyzing the raw data
# - According to the zIDI
rankedDataCancer <- univariateRankVariables(variableList = cancerVarNames,
formula = "Surv(pgtime, pgstat) ~ 1",
Outcome = "pgstat",
data = dataCancer,
categorizationType = "Raw",
type = "COX",
rankingTest = "zIDI",
description = "Description")
# Get a Cox proportional hazards model using:
# - 10 bootstrap loops
# - Age as a covariate
# - zIDI as the feature inclusion criterion
cancerModel <- ForwardSelection.Model.Bin(loops = 10,
covariates = "1 + age",
Outcome = "pgstat",
variableList = rankedDataCancer,
data = dataCancer,
type = "COX",
timeOutcome = "pgtime",
selectionType = "zIDI")
# Update the model
uCancerModel <- updateModel.Bin(Outcome = "pgstat",
VarFrequencyTable = cancerModel$ranked.var,
variableList = rankedDataCancer,
data = dataCancer,
type = "COX",
timeOutcome = "pgtime")
# Remove not significant variables from the previous model:
# - Using zIDI as the feature removal criterion
reducedCancerModel <- backVarElimination_Bin(object = uCancerModel$final.model,
Outcome = "pgstat",
data = dataCancer,
type = "COX",
selectionType = "zIDI")
# Validate the previous model:
# - Using 50 bootstrap loops
bootCancerModel <- bootstrapValidation_Bin(loops = 50,
model.formula = reducedCancerModel$back.formula,
Outcome = "pgstat",
data = dataCancer,
type = "COX")
# Get the summary of the bootstrapped model
sumBootCancerModel <- summary.bootstrapValidation_Bin(object = bootCancerModel)
# Plot the bootstrap results
plot(bootCancerModel)
# Scale the C prostate cancer data
dataCancerScale <- as.data.frame(scale(dataCancer))
# Generate a heat map using:
# - All the variables
# - The scaled data
hmAll <- heatMaps(variableList = rankedDataCancer,
Outcome = "pgstat",
data = dataCancerScale,
outcomeGain = 10)
# Generate a heat map using:
# - The top ranked variables
# - The scaled data
hmTop <- heatMaps(variableList = rankedDataCancer,
varRank = cancerModel$ranked.var,
Outcome = "pgstat",
data = dataCancerScale,
outcomeGain = 10)
# Get a new Cox proportional hazards model using:
# - The top 5 ranked variables
# - No bootstrapping
# - Age as a covariate
# - The zIDI as the feature inclusion criterion
# - A train fraction of 0.8
# - A 2-fold cross-validation in the feature selection and update procedures
# - A 10-fold cross-validation in the model validation procedure
# - An elimination p-value of 0.1
cancerModelCV <- crossValidationFeatureSelection_Bin(size = 5,
loops = 1,
covariates = "1 + age",
Outcome = "pgstat",
timeOutcome = "pgtime",
variableList = rankedDataCancer,
data = dataCancer,
type = "COX",
selectionType = "zIDI",
trainFraction = 0.8,
trainRepetition = 2,
CVfolds = 10,
elimination.pValue = 0.1)
# List the COX models
cancerModelCV$formula.list
# Shut down the graphics device driver
dev.off()
Run the code above in your browser using DataLab