## Not run:
# # input data on HDFS
# d <- ddf(hdfsConn("/path/to/big/data/on/hdfs"))
#
# # set RHIPE / Hadoop parameters
# # buffer sizes control how many k/v pairs are sent to map / reduce tasks at a time
# # mapred.reduce.tasks is a Hadoop config parameter that controls # of reduce tasks
# rhctl <- rhipeControl(mapred = list(
# rhipe_map_buff_size = 10000,
# mapred.reduce.tasks = 72,
# rhipe_reduce_buff_size = 1)
#
# # divide input data using these control parameters
# divide(d, by = "var", output = hdfsConn("/path/to/output"), control = rhctl)
# ## End(Not run)
Run the code above in your browser using DataLab