if (FALSE) {
# Generate some random data frame with 10 million rows and various column types
nr_of_rows <- 1e7
df <- data.frame(
Logical = sample(c(TRUE, FALSE, NA), prob = c(0.85, 0.1, 0.05), nr_of_rows, replace = TRUE),
Integer = sample(1L:100L, nr_of_rows, replace = TRUE),
Real = sample(sample(1:10000, 20) / 100, nr_of_rows, replace = TRUE),
Factor = as.factor(sample(labels(UScitiesD), nr_of_rows, replace = TRUE))
)
# Write the file to disk
fst_file <- tempfile(fileext = ".fst")
write_fst(df, fst_file)
# Get the size of 10000 rows
get_fst_chunk_size(fst_file,1e4)
# File all rows that Integer == 7 by chunks
import_fst_chunked(fst_file,chunk_f = \(x) x[Integer==7])
}
Run the code above in your browser using DataLab