# NOT RUN {
## A Simple Introduction --------------------------------------
head(iris)
collap(iris, ~ Species)                                        # Default: FUN = fmean for numeric
collapv(iris, 5)                                               # Same using collapv
collap(iris, ~ Species, fmedian)                               # Using the median
collap(iris, ~ Species, fmedian, keep.col.order = FALSE)       # Groups in-front
collap(iris, Sepal.Width + Petal.Width ~ Species, fmedian)     # Only '.Width' columns
collapv(iris, 5, cols = c(2, 4))                               # Same using collapv
collap(iris, ~ Species, list(fmean, fmedian))                  # Two functions
collap(iris, ~ Species, list(fmean, fmedian), return = "long") # Long format
collapv(iris, 5, custom = list(fmean = 1:2, fmedian = 3:4))    # Custom aggregation
collapv(iris, 5, custom = list(fmean = 1:2, fmedian = 3:4),    # Raw output, no column reordering
        return = "list")
collapv(iris, 5, custom = list(fmean = 1:2, fmedian = 3:4),    # A strange choice..
        return = "long")
collap(iris, ~ Species, w = ~ Sepal.Length)                    # Using Sepal.Length as weights, ..
weights <- abs(rnorm(fnrow(iris)))
collap(iris, ~ Species, w = weights)                           # Some random weights..
collap(iris, iris$Species, w = weights)                        # Note this behavior..
collap(iris, iris$Species, w = weights,
       keep.by = FALSE, keep.w = FALSE)
# }
# NOT RUN {
<!-- % \donttest{iris |> fgroup_by(Species) |> collapg()}                        # dplyr style, but faster -->
# }
# NOT RUN {
## Multi-Type Aggregation --------------------------------------
head(wlddev)                                                    # World Development Panel Data
head(collap(wlddev, ~ country + decade))                        # Aggregate by country and decade
head(collap(wlddev, ~ country + decade, fmedian, ffirst))       # Different functions
head(collap(wlddev, ~ country + decade, cols = is.numeric))     # Aggregate only numeric columns
head(collap(wlddev, ~ country + decade, cols = 9:13))           # Only the 5 series
head(collap(wlddev, PCGDP + LIFEEX ~ country + decade))         # Only GDP and life-expactancy
head(collap(wlddev, PCGDP + LIFEEX ~ country + decade, fsum))   # Using the sum instead
head(collap(wlddev, PCGDP + LIFEEX ~ country + decade, sum,     # Same using base::sum -> slower!
            na.rm = TRUE))
head(collap(wlddev, wlddev[c("country","decade")], fsum,        # Same, exploring different inputs
            cols = 9:10))
head(collap(wlddev[9:10], wlddev[c("country","decade")], fsum))
head(collapv(wlddev, c("country","decade"), fsum))              # ..names/indices with collapv
head(collapv(wlddev, c(1,5), fsum))
g <- GRP(wlddev, ~ country + decade)                            # Precomputing the grouping
head(collap(wlddev, g, keep.by = FALSE))                        # This is slightly faster now
# Aggregate categorical data using not the mode but the last element
head(collap(wlddev, ~ country + decade, fmean, flast))
head(collap(wlddev, ~ country + decade, catFUN = flast,         # Aggregate only categorical data
            cols = is_categorical))
## Weighted Aggregation ----------------------------------------
# We aggregate to region level using population weights
head(collap(wlddev, ~ region + year, w = ~ POP))                # Takes weighted mean for numeric..
# ..and weighted mode for categorical data. The weight vector is aggregated using fsum
head(collap(wlddev, ~ region + year, w = ~ POP,                 # Aggregating weights using sum
            wFUN = list(fsum, fmax)))                           # and max (corresponding to mode)
## Multi-Function Aggregation ----------------------------------
head(collap(wlddev, ~ country + decade, list(fmean, fnobs),     # Saving mean and Nobs
            cols = 9:13))
head(collap(wlddev, ~ country + decade,                         # Same using base R -> slower
            list(mean = mean,
                 Nobs = function(x, 
# }
# NOT RUN {
…
# }
# NOT RUN {
) sum(!is.na(x))),
            cols = 9:13, na.rm = TRUE))
lapply(collap(wlddev, ~ country + decade,                       # List output format
            list(fmean, fnobs), cols = 9:13, return = "list"), head)
head(collap(wlddev, ~ country + decade,                         # Long output format
            list(fmean, fnobs), cols = 9:13, return = "long"))
head(collap(wlddev, ~ country + decade,                         # Also aggregating categorical data,
            list(fmean, fnobs), return = "long_dupl"))          # and duplicating it 2 times
head(collap(wlddev, ~ country + decade,                         # Now also using 2 functions on
            list(fmean, fnobs), list(fmode, flast),             # categorical data
            keep.col.order = FALSE))
head(collap(wlddev, ~ country + decade,                         # More functions, string input,
            c("fmean","fsum","fnobs","fsd","fvar"),             # parallelized execution
            c("fmode","ffirst","flast","fndistinct"),           # (choose more than 1 cores,
            parallel = TRUE, mc.cores = 1L,                     # depending on your machine)
            keep.col.order = FALSE))
## Custom Aggregation ------------------------------------------
head(collap(wlddev, ~ country + decade,                         # Custom aggregation
            custom = list(fmean = 9:13, fsd = 9:10, fmode = 7:8)))
head(collap(wlddev, ~ country + decade,                         # Using column names
            custom = list(fmean = "PCGDP", fsd = c("LIFEEX","GINI"),
                          flast = "date")))
head(collap(wlddev, ~ country + decade,                         # Weighted parallelized custom
            custom = list(fmean = 9:12, fsd = 9:10,             # aggregation
                          fmode = 7:8), w = ~ POP,
            wFUN = list(fsum, fmax),
            parallel = TRUE, mc.cores = 1L))
head(collap(wlddev, ~ country + decade,                         # No column reordering
            custom = list(fmean = 9:12, fsd = 9:10,
                          fmode = 7:8), w = ~ POP,
            wFUN = list(fsum, fmax),
            parallel = TRUE, mc.cores = 1L, keep.col.order = FALSE))
# }
# NOT RUN {
<!-- % \donttest{ -->
# }
# NOT RUN {
## Piped Use --------------------------------------------------
library(magrittr) # Note: Used because |> is not available on older R versions
iris %>% fgroup_by(Species) %>% collapg()
wlddev %>% fgroup_by(country, decade) %>% collapg() %>% head()
wlddev %>% fgroup_by(region, year) %>% collapg(w = POP) %>% head()
wlddev %>% fgroup_by(country, decade) %>% collapg(fmedian, flast) %>% head()
wlddev %>% fgroup_by(country, decade) %>%
  collapg(custom = list(fmean = 9:12, fmode = 5:7, flast = 3)) %>% head()
# }
# NOT RUN {
<!-- % } -->
# }
Run the code above in your browser using DataLab