# NOT RUN {
#******** EXAMPLE 1 *********
#load plpData:
plpData <- loadPlpData(file.path('C:','User','home','data'))
#create study population to develop model on
#require minimum of 365 days observation prior to at risk start
#no prior outcome and person must be observed for 365 after index (minTimeAtRisk)
#with risk window from 0 to 365 days after index
population <- createStudyPopulation(plpData,outcomeId=2042,
firstExposureOnly = FALSE,
washoutPeriod = 365,
removeSubjectsWithPriorOutcome = TRUE,
priorOutcomeLookback = 99999,
requireTimeAtRisk = TRUE,
minTimeAtRisk=365,
riskWindowStart = 0,
addExposureDaysToStart = FALSE,
riskWindowEnd = 365,
addExposureDaysToEnd = FALSE)
#lasso logistic regression predicting outcome 200 in cohorts 10
#using no feature selection with a time split evaluation with 30% in test set
#70% in train set where the model hyper-parameters are selected using 3-fold cross validation:
#and results are saved to file.path('C:','User','home')
model.lr <- lassoLogisticRegression.set()
mod.lr <- runPlp(population=population,
plpData= plpData, minCovariateFraction = 0.001,
modelSettings = model.lr ,
testSplit = 'time', testFraction=0.3,
nfold=3, indexes=NULL,
saveDirectory =file.path('C:','User','myPredictionName'),
verbosity='INFO')
#******** EXAMPLE 2 *********
# Gradient boosting machine with a grid search to select hyper parameters
# using the test/train/folds created for the lasso logistic regression above
model.gbm <- gradientBoostingMachine.set(rsampRate=c(0.5,0.9,1),csampRate=1,
ntrees=c(10,100), bal=c(F,T),
max_depth=c(4,5), learn_rate=c(0.1,0.01))
mod.gbm <- runPlp(population=population,
plpData= plpData,
modelSettings = model.gbm,
testSplit = 'time', testFraction=0.3,
nfold=3, indexes=mod.lr$indexes,
saveDirectory =file.path('C:','User','myPredictionName2'))
# }
Run the code above in your browser using DataLab