# NOT RUN {
# Dataframe used throughout this doc
df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
tmp <- mutate(df, v1 = create_array(df$mpg, df$cyl, df$hp))
head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1), shuffle(tmp$v1)))
head(select(tmp, array_max(tmp$v1), array_min(tmp$v1), array_distinct(tmp$v1)))
head(select(tmp, array_position(tmp$v1, 21), array_repeat(df$mpg, 3), array_sort(tmp$v1)))
head(select(tmp, flatten(tmp$v1), reverse(tmp$v1), array_remove(tmp$v1, 21)))
tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
head(tmp2)
head(select(tmp, posexplode(tmp$v1)))
head(select(tmp, slice(tmp$v1, 2L, 2L)))
head(select(tmp, sort_array(tmp$v1)))
head(select(tmp, sort_array(tmp$v1, asc = FALSE)))
tmp3 <- mutate(df, v3 = create_map(df$model, df$cyl))
head(select(tmp3, map_keys(tmp3$v3), map_values(tmp3$v3)))
head(select(tmp3, element_at(tmp3$v3, "Valiant")))
tmp4 <- mutate(df, v4 = create_array(df$mpg, df$cyl), v5 = create_array(df$cyl, df$hp))
head(select(tmp4, concat(tmp4$v4, tmp4$v5), arrays_overlap(tmp4$v4, tmp4$v5)))
head(select(tmp4, array_except(tmp4$v4, tmp4$v5), array_intersect(tmp4$v4, tmp4$v5)))
head(select(tmp4, array_union(tmp4$v4, tmp4$v5)))
head(select(tmp4, arrays_zip(tmp4$v4, tmp4$v5), map_from_arrays(tmp4$v4, tmp4$v5)))
head(select(tmp, concat(df$mpg, df$cyl, df$hp)))
tmp5 <- mutate(df, v6 = create_array(df$model, df$model))
head(select(tmp5, array_join(tmp5$v6, "#"), array_join(tmp5$v6, "#", "NULL")))
# }
# NOT RUN {
# }
# NOT RUN {
# Converts a struct into a JSON object
df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
select(df2, to_json(df2$d, dateFormat = 'dd/MM/yyyy'))
# Converts an array of structs into a JSON array
df2 <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people")
df2 <- mutate(df2, people_json = to_json(df2$people))
# Converts a map into a JSON object
df2 <- sql("SELECT map('name', 'Bob')) as people")
df2 <- mutate(df2, people_json = to_json(df2$people))
# Converts an array of maps into a JSON array
df2 <- sql("SELECT array(map('name', 'Bob'), map('name', 'Alice')) as people")
df2 <- mutate(df2, people_json = to_json(df2$people))
# }
# NOT RUN {
# }
# NOT RUN {
df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
df2 <- mutate(df2, d2 = to_json(df2$d, dateFormat = 'dd/MM/yyyy'))
schema <- structType(structField("date", "string"))
head(select(df2, from_json(df2$d2, schema, dateFormat = 'dd/MM/yyyy')))
df2 <- sql("SELECT named_struct('name', 'Bob') as people")
df2 <- mutate(df2, people_json = to_json(df2$people))
schema <- structType(structField("name", "string"))
head(select(df2, from_json(df2$people_json, schema)))
head(select(df2, from_json(df2$people_json, "name STRING")))
# }
# NOT RUN {
# }
# NOT RUN {
df2 <- createDataFrame(data.frame(
id = c(1, 2, 3), text = c("a,b,c", NA, "d,e")
))
head(select(df2, df2$id, explode_outer(split_string(df2$text, ","))))
head(select(df2, df2$id, posexplode_outer(split_string(df2$text, ","))))
# }
Run the code above in your browser using DataLab