nail_condes: Interpret a continuous latent variable

Description

Generate an LLM response to analyze a continuous latent variable.

Usage

nail_condes(
  dataset,
  num.var,
  introduction = NULL,
  request = NULL,
  model = "llama3",
  quanti.threshold = 0,
  quanti.cat = c("Significantly above average", "Significantly below average", "Average"),
  weights = NULL,
  proba = 0.05,
  generate = FALSE
)

Value

A data frame containing the LLM's prompt and response (if generate = TRUE).

Arguments

dataset: a data frame made up of at least one quantitative variable and a set of quantitative variables and/or categorical variables.
num.var: the index of the variable to be characterized.
introduction: the introduction for the LLM prompt.
request: the request made to the LLM.
model: the model name ('llama3' by default).
quanti.threshold: the threshold above (resp. below) which a scaled variable is considered significantly above (resp.below) the average. Used when converting continuous variables to categorical ones.
quanti.cat: a vector of the 3 possible categories for continuous variables converted to categorical ones according to the threshold. Default is "above average", "below average" and "average".
weights: weights for the individuals (see FactoMineR::condes()).
proba: the significance threshold considered to characterize the category (by default 0.05).
generate: a boolean that indicates whether to generate the LLM response. If FALSE, the function only returns the prompt.

Details

This function directly sends a prompt to an LLM. Therefore, to get a consistent answer, we highly recommend to customize the parameters introduction and request and add all relevant information on your data for the LLM. We also recommend renaming the columns with clear, unshortened and unambiguous names.

Examples

Run this code

if (FALSE) {
# Processing time is often longer than ten seconds
# because the function uses a large language model.

### Example 1: decathlon dataset ###

library(FactoMineR)
data(decathlon)

names(decathlon) <- c('Time taken to complete the 100m',
'Distance reached for the long jump',
'Distance reached for the shot put',
'Height reached for the high jump',
'Time taken to complete the 400m',
'Time taken to complete the 110m hurdle',
'Distance reached for the discus',
'Height reached for the pole vault',
'Distance reached for the javeline',
'Time taken to complete the 1500 m',
'Rank/Counter-performance indicator',
'Points', 'Competition')

res_pca_deca <- FactoMineR::PCA(decathlon,
quanti.sup = 11:12, quali.sup = 13, graph = FALSE)
plot.PCA(res_pca_deca, choix = 'var')
deca_work <- res_pca_deca$ind$coord |> as.data.frame()
deca_work <- deca_work[,1] |> cbind(decathlon)

intro_deca <- "A study was led on athletes
participating in a decathlon event.
Their performance was assessed on each part of the decathlon,
and they were all placed on an unidimensional scale."
intro_deca <- gsub('\n', ' ', intro_deca) |>
stringr::str_squish()

res_deca <- nail_condes(deca_work,
                        num.var = 1,
                        quanti.threshold = 1,
                        quanti.cat = c('High', 'Low', 'Average'),
                        introduction = intro_deca,
                        generate = TRUE)

cat(res_deca$response)


### Example 2: agri_studies dataset ###

data(agri_studies)

set.seed(1)
res_mca_agri <- FactoMineR::MCA(agri_studies, quali.sup = 39:42,
level.ventil = 0.05, graph = FALSE)
plot.MCA(res_mca_agri, choix = 'ind',
invisible = c('var', 'quali.sup'), label = 'none')

agri_work <- res_mca_agri$ind$coord |> as.data.frame()
agri_work <- agri_work[,1] |> cbind(agri_studies)

intro_agri <- "These data were collected after a survey
on students' expectations of agribusiness studies.
Participants had to rank how much they agreed with 38 statements
about possible benefits from agribusiness studies;
then, they were asked personal questions."
intro_agri <- gsub('\n', ' ', intro_agri) |>
stringr::str_squish()

res_agri <- nail_condes(agri_work,
                        num.var = 1,
                        introduction = intro_agri,
                        generate = TRUE)

cat(res_agri$response)

### Example 3: glossophobia dataset ###

data(glossophobia)

set.seed(1)
res_mca_phobia <- FactoMineR::MCA(glossophobia,
quali.sup = 26:41, level.ventil = 0.05, graph = FALSE)
plot.MCA(res_mca_phobia, choix = 'ind',
invisible = c('var', 'quali.sup'), label = 'none')

phobia_work <- res_mca_phobia$ind$coord |> as.data.frame()
phobia_work <- phobia_work[,1] |> cbind(glossophobia)

intro_phobia <- "These data were collected after a survey
on participants' feelings about speaking in public.
Participants had to rank how much they agreed with
25 descriptions of speaking in public;
then, they were asked personal questions."
intro_phobia <- gsub('\n', ' ', intro_phobia) |>
stringr::str_squish()

res_phobia <- nail_condes(phobia_work,
                          num.var = 1,
                          introduction = intro_phobia,
                          generate = TRUE)

cat(res_phobia$response)

### Example 4: beard_cont dataset ###

data(beard_cont)

set.seed(1)
res_ca_beard <- FactoMineR::CA(beard_cont, graph = FALSE)
plot.CA(res_ca_beard, invisible = 'col')

beard_work <- res_ca_beard$row$coord |> as.data.frame()
beard_work <- beard_work[,1] |> cbind(beard_cont)

intro_beard <- "These data refer to 8 types of beards.
Each beard was evaluated by 62 assessors."
intro_beard <- gsub('\n', ' ', intro_beard) |>
stringr::str_squish()

req_beard <- "Please explain what differentiates beards
on both sides of the scale.
Then, give the scale a name."
req_beard <- gsub('\n', ' ', req_beard) |>
stringr::str_squish()

res_beard <- nail_condes(beard_work,
                         num.var = 1,
                         quanti.threshold = 0.5,
                         quanti.cat = c('Very often used', 'Never used', 'Sometimes used'),
                         introduction = intro_beard,
                         request = req_beard)

res_beard

ppt <- stringr::str_replace_all(res_beard, 'observations', 'beards')
cat(ppt)

res_beard <- ollamar::generate(model = 'llama3', prompt = ppt, output = 'text')

cat(res_beard)
}

Run the code above in your browser using DataLab