# Quality assurance -----------------------------
if (!identical(Sys.getenv("ANTHROPIC_API_KEY"), "")) {
# set the log directory to a temporary directory
withr::local_envvar(VITALS_LOG_DIR = withr::local_tempdir())
library(ellmer)
library(tibble)
simple_addition <- tibble(
input = c("What's 2+2?", "What's 2+3?"),
target = c("4", "5")
)
tsk <- Task$new(
dataset = simple_addition,
solver = generate(solver_chat = chat_anthropic(model = "claude-3-7-sonnet-latest")),
scorer = model_graded_qa()
)
tsk$eval()
}
# Factual response -------------------------------
if (!identical(Sys.getenv("ANTHROPIC_API_KEY"), "")) {
# set the log directory to a temporary directory
withr::local_envvar(VITALS_LOG_DIR = withr::local_tempdir())
library(ellmer)
library(tibble)
r_history <- tibble(
input = c(
"Who created the R programming language?",
"In what year was version 1.0 of R released?"
),
target = c("Ross Ihaka and Robert Gentleman.", "2000.")
)
tsk <- Task$new(
dataset = r_history,
solver = generate(solver_chat = chat_anthropic(model = "claude-3-7-sonnet-latest")),
scorer = model_graded_fact()
)
tsk$eval()
}
Run the code above in your browser using DataLab