library(survival)
colon$status <- factor(as.character(colon$status))
# For testing, only 5 simulations are used here. Usually at least 300 to 500
# simulations are a minimum. Increasing the simulations leads to more reliable results.
# The default value of 2000 simulations should provide reasonably reliable results.
generic_input_parameters <- create_generic_input_parameters(
general_title = "Prediction of colon cancer death", simulations = 5,
simulations_per_file = 20, seed = 1, df = colon, outcome_name = "status",
outcome_type = "time-to-event", outcome_time = "time", outcome_count = FALSE,
verbose = FALSE)$generic_input_parameters
analysis_details <- cbind.data.frame(
name = c('age', 'single_mandatory_predictor', 'complex_models',
'complex_models_only_optional_predictors', 'predetermined_model_text'),
analysis_title = c('Simple cut-off based on age', 'Single mandatory predictor (rx)',
'Multiple mandatory and optional predictors',
'Multiple optional predictors only', 'Predetermined model text'),
develop_model = c(FALSE, TRUE, TRUE, TRUE, TRUE),
predetermined_model_text = c(NA, NA, NA, NA,
"cph(Surv(time, status) ~ rx * age, data = df_training_complete, x = TRUE, y = TRUE)"),
mandatory_predictors = c(NA, 'rx', 'rx; differ; perfor; adhere; extent', NA, "rx; age"),
optional_predictors = c(NA, NA, 'sex; age; nodes', 'rx; differ; perfor', NA),
mandatory_interactions = c(NA, NA, 'rx; differ; extent', NA, NA),
optional_interactions = c(NA, NA, 'perfor; adhere; sex; age; nodes', 'rx; differ', NA),
model_threshold_method = c(NA, 'youden', 'youden', 'youden', 'youden'),
scoring_system = c('age', NA, NA, NA, NA),
predetermined_threshold = c('60', NA, NA, NA, NA),
higher_values_event = c(TRUE, NA, NA, NA, NA)
)
write.csv(analysis_details, paste0(tempdir(), "/analysis_details.csv"),
row.names = FALSE, na = "")
analysis_details_path <- paste0(tempdir(), "/analysis_details.csv")
# verbose is TRUE as default. If you do not want the outcome displayed, you can
# change this to FALSE, as shown here
results <- create_specific_input_parameters(
generic_input_parameters = generic_input_parameters,
analysis_details_path = analysis_details_path, verbose = FALSE)
specific_input_parameters <- results$specific_input_parameters
# Set a seed for reproducibility - Please see details above
set.seed(generic_input_parameters$seed)
prepared_datasets <- {prepare_datasets(
df = generic_input_parameters$df,
simulations = generic_input_parameters$simulations,
outcome_name = generic_input_parameters$outcome_name,
outcome_type = generic_input_parameters$outcome_type,
outcome_time = generic_input_parameters$outcome_time,
verbose = FALSE)}
# There is no usually no requirement to call this function directly. This is used
# by the perform_analysis function to create the actual and predicted values.
specific_input_parameters_each_analysis <- specific_input_parameters[[1]]
results <- perform_analysis(generic_input_parameters,
specific_input_parameters_each_analysis, prepared_datasets, verbose = FALSE)
results$apparent_performance
Run the code above in your browser using DataLab