# NOT RUN {
## A Simple Introduction --------------------------------------
head(iris)
collap(iris, ~ Species)                                        # Default: FUN = fmean for numeric
collapv(iris, 5)                                               # Same using collapv
collap(iris, ~ Species, fmedian)                               # Using the median
collap(iris, ~ Species, fmedian, keep.col.order = FALSE)       # Groups in-front
collap(iris, Sepal.Width + Petal.Width ~ Species, fmedian)     # Only '.Width' columns
collapv(iris, 5, cols = c(2, 4))                               # Same using collapv
collap(iris, ~ Species, list(fmean, fmedian))                  # Two functions
collap(iris, ~ Species, list(fmean, fmedian), return = "long") # Long format
collapv(iris, 5, custom = list(fmean = 1:2, fmedian = 3:4))    # Custom aggregation
collapv(iris, 5, custom = list(fmean = 1:2, fmedian = 3:4),    # Raw output, no column reordering
        return = "list")
collapv(iris, 5, custom = list(fmean = 1:2, fmedian = 3:4),    # A strange choice...
        return = "long")
collap(iris, ~ Species, w = ~ Sepal.Length)                    # Using Sepal.Length as weights, ..
weights <- abs(rnorm(fnrow(iris)))
collap(iris, ~ Species, w = weights)                           # Some random weights..
collap(iris, iris$Species, w = weights)                        # Note this behavior...
collap(iris, iris$Species, w = weights,
       keep.by = FALSE, keep.w = FALSE)
library(dplyr) # Needed for "%>%"
iris %>% fgroup_by(Species) %>% collapg                        # dplyr style, but faster
## Multi-Type Aggregation --------------------------------------
head(wlddev)                                                    # World Development Panel Data
head(collap(wlddev, ~ country + decade))                        # Aggregate by country and decade
head(collap(wlddev, ~ country + decade, fmedian, ffirst))       # Different functions
head(collap(wlddev, ~ country + decade, cols = is.numeric))     # Aggregate only numeric columns
head(collap(wlddev, ~ country + decade, cols = 9:12))           # Only the 4 series
head(collap(wlddev, PCGDP + LIFEEX ~ country + decade))         # Only GDP and life-expactancy
head(collap(wlddev, PCGDP + LIFEEX ~ country + decade, fsum))   # Using the sum instead
head(collap(wlddev, PCGDP + LIFEEX ~ country + decade, sum,     # Same using base::sum -> slower!!
            na.rm = TRUE))
head(collap(wlddev, wlddev[c("country","decade")], fsum,        # same, exploring different inputs
            cols = 9:10))
head(collap(wlddev[9:10], wlddev[c("country","decade")], fsum))
head(collapv(wlddev, c("country","decade"), fsum))              # ... names/indices with collapv
head(collapv(wlddev, c(1,5), fsum))
g <- GRP(wlddev, ~ country + decade)                            # Precomputing the grouping
head(collap(wlddev, g, keep.by = FALSE))                        # This is slightly faster now
# Aggregate categorical data using not the mode but the last element
head(collap(wlddev, ~ country + decade, fmean, flast))
head(collap(wlddev, ~ country + decade, catFUN = flast,         # Aggregate only categorical data
            cols = is.categorical))
## Weighted aggregation ----------------------------------------
weights <- abs(rnorm(fnrow(wlddev)))                            # Random weight vector
head(collap(wlddev, ~ country + decade, w = weights))           # Takes weighted mean for numeric..
# ..and weighted mode for categorical data. The weight vector is aggregated using fsum
wlddev$weights <- weights                                       # Adding to data
head(collap(wlddev, ~ country + decade, w = ~ weights))         # Keeps column order
head(collap(wlddev, ~ country + decade, w = ~ weights,          # Aggregating weights using sum
            wFUN = list(fsum, fmax)))                           # and max (corresponding to mode)
wlddev$weights <- NULL
## Multi-Function Aggregation ----------------------------------
head(collap(wlddev, ~ country + decade, list(fmean, fNobs),     # Saving mean and Nobs
            cols = 9:12))
head(collap(wlddev, ~ country + decade,                         # same using base R -> slower
            list(mean = mean,
                 Nobs = function(x,...) sum(!is.na(x))),
            cols = 9:12, na.rm = TRUE))
head(collap(wlddev, ~ country + decade,                         # list output format
            list(fmean, fNobs), cols = 9:12, return = "list"))
head(collap(wlddev, ~ country + decade,                         # long output format
            list(fmean, fNobs), cols = 9:12, return = "long"))
head(collap(wlddev, ~ country + decade,                         # also aggregating categorical data,
            list(fmean, fNobs), return = "long_dupl"))          # and duplicating it 2 times
head(collap(wlddev, ~ country + decade,                         # now also using 2 functions on
            list(fmean, fNobs), list(fmode, flast),             # categorical data
            keep.col.order = FALSE))
head(collap(wlddev, ~ country + decade,                         # more functions, string input,
            c("fmean","fsum","fNobs","fsd","fvar"),             # parallelized execution
            c("fmode","ffirst","flast","fNdistinct"),           # (choose more than 1 cores,
            parallel = TRUE, mc.cores = 1L,                     # depending on your machine)
            keep.col.order = FALSE))
## Custom Aggregation ------------------------------------------
head(collap(wlddev, ~ country + decade,                         # custom aggregation
            custom = list(fmean = 9:12, fsd = 9:10, fmode = 7:8)))
head(collap(wlddev, ~ country + decade,                         # using column names
            custom = list(fmean = "PCGDP", fsd = c("LIFEEX","GINI"),
                          flast = "date")))
head(collap(wlddev, ~ country + decade,                         # weighted parallelized custom
            custom = list(fmean = 9:12, fsd = 9:10,             # aggregation
                          fmode = 7:8), w = weights,
            wFUN = list(fsum, fmax),
            parallel = TRUE, mc.cores = 1L))
head(collap(wlddev, ~ country + decade,                         # No column reordering
            custom = list(fmean = 9:12, fsd = 9:10,
                          fmode = 7:8), w = weights,
            wFUN = list(fsum, fmax),
            parallel = TRUE, mc.cores = 1L, keep.col.order = FALSE))
## Piped use --------------------------------------------------
wlddev %>% fgroup_by(country, decade) %>% collapg
wlddev %>% fgroup_by(country, decade) %>% collapg(w = ODA)
wlddev %>% fgroup_by(country, decade) %>% collapg(fmedian, flast)
wlddev %>% fgroup_by(country, decade) %>%
  collapg(custom = list(fmean = 9:12, fmode = 5:7, flast = 3))
# }
Run the code above in your browser using DataLab