# Create a sample dataframe
sample_df <- data.frame(
record_id = c("REC001", "REC002", "REC003", "REC004"),
src_subject_id = c("SUB001", "SUB002", "SUB003", "SUB004"),
subjectkey = c("KEY001", "KEY002", "KEY003", "KEY004"),
site = c("Yale", "NU", "Yale", "NU"),
phenotype = c("A", "B", "A", "C"),
visit = c(1, 2, 2, 1),
state = c("complete", "completed baseline", "in progress", NA),
status = c(NA, NA, NA, "complete"),
lost_to_followup = c(FALSE, FALSE, TRUE, NA),
interview_date = c("2023-01-15", "2023/02/20", NA, "2023-03-10")
)
# Set row names for demonstration
rownames(sample_df) <- c("foo", "bar", "baz", "qux")
# Filter by specific date
filtered1 <- sift(sample_df,
cols = c("src_subject_id", "phenotype"),
visit = 2,
interview_date = "01/31/2023")
# Filter to include only rows with non-NA interview dates
filtered2 <- sift(sample_df,
interview_date = TRUE)
# Filter by status (works with either state or status column)
filtered3 <- sift(sample_df,
status = c("complete", "completed baseline"))
# Filter with specific row names
filtered4 <- sift(sample_df,
rows = c("foo", "qux"))
# Filter with vector of visit values
filtered6 <- sift(sample_df,
visit = c(1, 2))
# Filter by lost_to_followup
filtered10 <- sift(sample_df,
lost_to_followup = FALSE)
# Filter by src_subject_id
filtered11 <- sift(sample_df,
src_subject_id = c("SUB001", "SUB004"))
# Multiple filters combined
filtered12 <- sift(sample_df,
site = "Yale",
visit = 1,
cols = c("record_id", "src_subject_id", "site"))
Run the code above in your browser using DataLab