text <- c(
paste(
"Hey, I like kittens. I think all kinds of cats really are just the",
"best pet ever."
),
paste(
"Oh year? Well I really like cars. All the wheels and the turbos...",
"I think that's the best ever."
),
paste(
"You know what? Poo on you. Cats, dogs, rabbits -- you know, living",
"creatures... to think you'd care about anything else!"
),
paste(
"You can stick to your opinion. You can be wrong if you want. You know",
"what life's about? Supercharging, diesel guzzling, exhaust spewing,",
"piston moving ignitions."
)
)
dtm <- lma_dtm(text)
# calculate a latent semantic space from the example text
lss <- lma_lspace(dtm)
# show that document similarities between the truncated and full space are the same
spaces <- list(
full = lma_lspace(dtm, keep.dim = TRUE),
truncated = lma_lspace(dtm, lss)
)
sapply(spaces, lma_simets, metric = "cosine")
if (FALSE) {
# specify a directory containing spaces,
# or where you would like to download spaces
space_dir <- "~/Latent Semantic Spaces"
# map to a pretrained space
ddm <- lma_lspace(dtm, "100k", dir = space_dir)
# load the matching subset of the space
# without mapping
lss_100k_part <- lma_lspace(colnames(dtm), "100k", dir = space_dir)
## or
lss_100k_part <- lma_lspace(dtm, "100k", map.space = FALSE, dir = space_dir)
# load the full space
lss_100k <- lma_lspace("100k", dir = space_dir)
## or
lss_100k <- lma_lspace(space = "100k", dir = space_dir)
}
Run the code above in your browser using DataLab