# Conversion from a sqlite dbi connection to a single parquet file :
dbi_connection <- DBI::dbConnect(RSQLite::SQLite(),
system.file("extdata","iris.sqlite",package = "parquetize"))
# Reading iris table from local sqlite database
# and conversion to one parquet file :
dbi_to_parquet(
conn = dbi_connection,
sql_query = "SELECT * FROM iris",
path_to_parquet = tempfile(fileext=".parquet"),
)
# Reading iris table from local sqlite database by chunk (using
# `max_memory` argument) and conversion to multiple parquet files
dbi_to_parquet(
conn = dbi_connection,
sql_query = "SELECT * FROM iris",
path_to_parquet = tempdir(),
max_memory = 2 / 1024
)
# Using chunk and partition together is not possible directly but easy to do :
# Reading iris table from local sqlite database by chunk (using
# `max_memory` argument) and conversion to arrow dataset partitioned by
# species
# get unique values of column "iris from table "iris"
partitions <- get_partitions(dbi_connection, table = "iris", column = "Species")
# loop over those values
for (species in partitions) {
dbi_to_parquet(
conn = dbi_connection,
# use glue_sql to create the query filtering the partition
sql_query = glue::glue_sql("SELECT * FROM iris where Species = {species}",
.con = dbi_connection),
# add the partition name in the output dir to respect parquet partition schema
path_to_parquet = file.path(tempdir(), "iris", paste0("Species=", species)),
max_memory = 2 / 1024,
)
}
# If you need a more complicated query to get your partitions, you can use
# dbGetQuery directly :
col_to_partition <- DBI::dbGetQuery(dbi_connection, "SELECT distinct(`Species`) FROM `iris`")[,1]
Run the code above in your browser using DataLab