# `md1`: an example raw data set.
md1 <-
tibble::tribble(
~oak_id, ~raw_source, ~patient_number, ~MDBDR, ~MDEDR, ~MDETM,
1L, "MD1", 375, NA, NA, NA,
2L, "MD1", 375, "15-Sep-20", NA, NA,
3L, "MD1", 376, "17-Feb-21", "17-Feb-21", NA,
4L, "MD1", 377, "4-Oct-20", NA, NA,
5L, "MD1", 377, "20-Jan-20", "20-Jan-20", "10:00:00",
6L, "MD1", 377, "UN-UNK-2019", "UN-UNK-2019", NA,
7L, "MD1", 377, "20-UNK-2019", "20-UNK-2019", NA,
8L, "MD1", 378, "UN-UNK-2020", "UN-UNK-2020", NA,
9L, "MD1", 378, "26-Jan-20", "26-Jan-20", "07:00:00",
10L, "MD1", 378, "28-Jan-20", "1-Feb-20", NA,
11L, "MD1", 378, "12-Feb-20", "18-Feb-20", NA,
12L, "MD1", 379, "10-UNK-2020", "20-UNK-2020", NA,
13L, "MD1", 379, NA, NA, NA,
14L, "MD1", 379, NA, "17-Feb-20", NA
)
# Using the raw data set `md1`, derive the variable CMSTDTC from MDBDR using
# the parsing format (`raw_fmt`) `"d-m-y"` (day-month-year), while allowing
# for the presence of special date component values (e.g. `"UN"` or `"UNK"`),
# indicating that these values are missing/unknown (unk).
cm1 <-
assign_datetime(
tgt_var = "CMSTDTC",
raw_dat = md1,
raw_var = "MDBDR",
raw_fmt = "d-m-y",
raw_unk = c("UN", "UNK")
)
cm1
# Inspect parsing failures associated with derivation of CMSTDTC.
problems(cm1$CMSTDTC)
# `cm_inter`: an example target data set.
cm_inter <-
tibble::tibble(
oak_id = 1L:14L,
raw_source = "MD1",
patient_number = c(
375, 375, 376, 377, 377, 377, 377, 378,
378, 378, 378, 379, 379, 379
),
CMTRT = c(
"BABY ASPIRIN",
"CORTISPORIN",
"ASPIRIN",
"DIPHENHYDRAMINE HCL",
"PARCETEMOL",
"VOMIKIND",
"ZENFLOX OZ",
"AMITRYPTYLINE",
"BENADRYL",
"DIPHENHYDRAMINE HYDROCHLORIDE",
"TETRACYCLINE",
"BENADRYL",
"SOMINEX",
"ZQUILL"
),
CMINDC = c(
"NA",
"NAUSEA",
"ANEMIA",
"NAUSEA",
"PYREXIA",
"VOMITINGS",
"DIARHHEA",
"COLD",
"FEVER",
"LEG PAIN",
"FEVER",
"COLD",
"COLD",
"PAIN"
)
)
# Same derivation as above but now involving the merging with the target
# data set `cm_inter`.
cm2 <-
assign_datetime(
tgt_dat = cm_inter,
tgt_var = "CMSTDTC",
raw_dat = md1,
raw_var = "MDBDR",
raw_fmt = "d-m-y"
)
cm2
# Inspect parsing failures associated with derivation of CMSTDTC.
problems(cm2$CMSTDTC)
# Derive CMSTDTC using both MDEDR and MDETM variables.
# Note that the format `"d-m-y"` is used for parsing MDEDR and `"H:M:S"` for
# MDETM (correspondence is by positional matching).
cm3 <-
assign_datetime(
tgt_var = "CMSTDTC",
raw_dat = md1,
raw_var = c("MDEDR", "MDETM"),
raw_fmt = c("d-m-y", "H:M:S"),
raw_unk = c("UN", "UNK")
)
cm3
# Inspect parsing failures associated with derivation of CMSTDTC.
problems(cm3$CMSTDTC)
Run the code above in your browser using DataLab