# NOT RUN {
plosdat <- system.file("examples", "plos_data.json", package = "elastic")
docs_bulk(plosdat)
aliases_get()
index_delete(index='plos')
aliases_get()
# Curl options
library("httr")
plosdat <- system.file("examples", "plos_data.json", package = "elastic")
docs_bulk(plosdat, config=verbose())
# From a data.frame
docs_bulk(mtcars, index = "hello", type = "world")
## field names cannot contain dots
names(iris) <- gsub("\\.", "_", names(iris))
docs_bulk(iris, "iris", "flowers")
## type can be missing, but index can not
docs_bulk(iris, "flowers")
## big data.frame, 53K rows, load ggplot2 package first
# res <- docs_bulk(diamonds, "diam")
# Search("diam")$hits$total
# From a list
docs_bulk(apply(iris, 1, as.list), index="iris", type="flowers")
docs_bulk(apply(USArrests, 1, as.list), index="arrests")
# dim_list <- apply(diamonds, 1, as.list)
# out <- docs_bulk(dim_list, index="diamfromlist")
# When using in a loop
## We internally get last _id counter to know where to start on next bulk
## insert but you need to sleep in between docs_bulk calls, longer the
## bigger the data is
files <- c(system.file("examples", "test1.csv", package = "elastic"),
system.file("examples", "test2.csv", package = "elastic"),
system.file("examples", "test3.csv", package = "elastic"))
for (i in seq_along(files)) {
d <- read.csv(files[[i]])
docs_bulk(d, index = "testes", type = "docs")
Sys.sleep(1)
}
count("testes", "docs")
index_delete("testes")
# You can include your own document id numbers
## Either pass in as an argument
index_create("testes")
files <- c(system.file("examples", "test1.csv", package = "elastic"),
system.file("examples", "test2.csv", package = "elastic"),
system.file("examples", "test3.csv", package = "elastic"))
tt <- vapply(files, function(z) NROW(read.csv(z)), numeric(1))
ids <- list(1:tt[1],
(tt[1] + 1):(tt[1] + tt[2]),
(tt[1] + tt[2] + 1):sum(tt))
for (i in seq_along(files)) {
d <- read.csv(files[[i]])
docs_bulk(d, index = "testes", type = "docs", doc_ids = ids[[i]],
es_ids = FALSE)
}
count("testes", "docs")
index_delete("testes")
## or include in the input data
### from data.frame's
index_create("testes")
files <- c(system.file("examples", "test1_id.csv", package = "elastic"),
system.file("examples", "test2_id.csv", package = "elastic"),
system.file("examples", "test3_id.csv", package = "elastic"))
readLines(files[[1]])
for (i in seq_along(files)) {
d <- read.csv(files[[i]])
docs_bulk(d, index = "testes", type = "docs")
}
count("testes", "docs")
index_delete("testes")
### from lists via file inputs
index_create("testes")
for (i in seq_along(files)) {
d <- read.csv(files[[i]])
d <- apply(d, 1, as.list)
docs_bulk(d, index = "testes", type = "docs")
}
count("testes", "docs")
index_delete("testes")
# data.frame's with a single column
## this didn't use to work, but now should work
db <- paste0(sample(letters, 10), collapse = "")
index_create(db)
res <- data.frame(foo = 1:10)
out <- docs_bulk(x = res, index = db)
count(db)
index_delete(db)
# }
Run the code above in your browser using DataLab