roomba
This is a package to transform large, multi-nested lists into a more user-friendly format (i.e. a tibble
) in R
. The initial focus is on making processing of return values from jsonlite::fromJSON()
queries more seamless, but ideally this package should be useful for deeply-nested lists from an array of sources.
roomba()
searches deeply-nested list for names specified incols
(a character vector) and returns atibble
with the associated column titles. Nothing further about nesting hierarchy or depth need be specified.Handles empty values gracefully by substituting
NULL
values withNA
or user-specified value indefault
, or truncates lists appropriately.If you're only interested in sniffing out and replacing all
NULL
s, turn to thereplace_null()
function.Option to
keep
any
orall
data from the columns supplied
Installation
You can install the development version from GitHub with:
# install.packages("devtools")
devtools::install_github("cstawitz/roomba")
Usage
Say we have some JSON from a pesky API.
library(roomba)
json <- '
{
"stuff": {
"buried": {
"deep": [
{
"location": "here",
"name": "Laura DeCicco",
"super_power": "fixing merge conflicts",
"other_secret_power": []
},
{
"location": "here",
"name": "Amanda Dobbyn",
"super_power": "flight",
"more_nested_stuff": 4
}
],
"alsodeep": 2342423234,
"stilldeep": {
"even_deeper": [
{
"location": "not here",
"name": "Jim Hester",
"super_power": []
},
{
"location": "here",
"name": "Christine Stawitz",
"super_power": "invisibility",
"more_nested_stuff": 5
},
{
"location": "here",
"name": "Isabella Velasquez",
"super_power": "teleportation"
}
]
}
}
}
}'
The JSON becomes a nested R list,
super_data <- json %>%
jsonlite::fromJSON(simplifyVector = FALSE)
which we can pull data into the columns we want with roomba
.
super_data %>%
roomba(cols = c("name", "super_power", "more_nested_stuff"), keep = any)
#> # A tibble: 5 x 3
#> name super_power more_nested_stuff
#> <chr> <chr> <int>
#> 1 Laura DeCicco fixing merge conflicts NA
#> 2 Amanda Dobbyn flight 4
#> 3 Jim Hester <NA> NA
#> 4 Christine Stawitz invisibility 5
#> 5 Isabella Velasquez teleportation NA
Let's try with a real-world Twitter example (see package data to use this data).
roomba(twitter_data, c("created_at", "name"))
#> # A tibble: 24 x 2
#> name created_at
#> <chr> <chr>
#> 1 Code for America Mon Aug 10 18:59:29 +0000 2009
#> 2 Ben Lorica 罗瑞卡 Mon Dec 22 22:06:18 +0000 2008
#> 3 Dan Sholler Thu Apr 03 20:09:24 +0000 2014
#> 4 Code for America Mon Aug 10 18:59:29 +0000 2009
#> 5 FiveThirtyEight Tue Jan 21 21:39:32 +0000 2014
#> 6 Digital Impact Wed Oct 07 21:10:53 +0000 2009
#> 7 Drew Williams Thu Aug 07 18:41:29 +0000 2014
#> 8 joe Fri May 29 13:25:25 +0000 2009
#> 9 Data Analysts 4 Good Wed May 07 16:55:33 +0000 2014
#> 10 Ryan Frederick Sun Mar 01 19:06:53 +0000 2009
#> # ... with 14 more rows
Shiny app included!
shiny_roomba()
What did that original data look like???
Feast your eyes on the original super_data
list!
super_data
#> $stuff
#> $stuff$buried
#> $stuff$buried$deep
#> $stuff$buried$deep[[1]]
#> $stuff$buried$deep[[1]]$location
#> [1] "here"
#>
#> $stuff$buried$deep[[1]]$name
#> [1] "Laura DeCicco"
#>
#> $stuff$buried$deep[[1]]$super_power
#> [1] "fixing merge conflicts"
#>
#> $stuff$buried$deep[[1]]$other_secret_power
#> list()
#>
#>
#> $stuff$buried$deep[[2]]
#> $stuff$buried$deep[[2]]$location
#> [1] "here"
#>
#> $stuff$buried$deep[[2]]$name
#> [1] "Amanda Dobbyn"
#>
#> $stuff$buried$deep[[2]]$super_power
#> [1] "flight"
#>
#> $stuff$buried$deep[[2]]$more_nested_stuff
#> [1] 4
#>
#>
#>
#> $stuff$buried$alsodeep
#> [1] 2342423234
#>
#> $stuff$buried$stilldeep
#> $stuff$buried$stilldeep$even_deeper
#> $stuff$buried$stilldeep$even_deeper[[1]]
#> $stuff$buried$stilldeep$even_deeper[[1]]$location
#> [1] "not here"
#>
#> $stuff$buried$stilldeep$even_deeper[[1]]$name
#> [1] "Jim Hester"
#>
#> $stuff$buried$stilldeep$even_deeper[[1]]$super_power
#> list()
#>
#>
#> $stuff$buried$stilldeep$even_deeper[[2]]
#> $stuff$buried$stilldeep$even_deeper[[2]]$location
#> [1] "here"
#>
#> $stuff$buried$stilldeep$even_deeper[[2]]$name
#> [1] "Christine Stawitz"
#>
#> $stuff$buried$stilldeep$even_deeper[[2]]$super_power
#> [1] "invisibility"
#>
#> $stuff$buried$stilldeep$even_deeper[[2]]$more_nested_stuff
#> [1] 5
#>
#>
#> $stuff$buried$stilldeep$even_deeper[[3]]
#> $stuff$buried$stilldeep$even_deeper[[3]]$location
#> [1] "here"
#>
#> $stuff$buried$stilldeep$even_deeper[[3]]$name
#> [1] "Isabella Velasquez"
#>
#> $stuff$buried$stilldeep$even_deeper[[3]]$super_power
#> [1] "teleportation"
And just the first element of the twitter
dataset