Skip to contents

These functions wrap common vitals scorers for direct use as dsprrr metrics, eliminating the need to manually call as_dsprrr_metric().

Usage

metric_model_graded_qa(
  template = NULL,
  instructions = NULL,
  grade_pattern = "(?i)GRADE\\s*:\\s*([CPI])(.*)$",
  partial_credit = FALSE,
  scorer_chat = NULL,
  input_column = "input",
  target_column = "target",
  result_column = "result"
)

metric_model_graded_fact(
  template = NULL,
  instructions = NULL,
  grade_pattern = "(?i)GRADE\\s*:\\s*([CPI])(.*)$",
  partial_credit = FALSE,
  scorer_chat = NULL,
  input_column = "input",
  target_column = "target",
  result_column = "result"
)

metric_detect_match(
  location = c("end", "begin", "any", "exact"),
  case_sensitive = FALSE,
  input_column = "input",
  target_column = "target",
  result_column = "result"
)

metric_detect_includes(
  case_sensitive = FALSE,
  input_column = "input",
  target_column = "target",
  result_column = "result"
)

metric_detect_pattern(
  pattern,
  case_sensitive = FALSE,
  all = FALSE,
  input_column = "input",
  target_column = "target",
  result_column = "result"
)

Arguments

template

Grading template (glue string with input, answer, criterion, instructions substitutions)

instructions

Grading instructions

grade_pattern

Regex pattern to extract grade from judge response

partial_credit

Whether to allow partial credit

scorer_chat

An ellmer chat for grading (e.g., ellmer::chat_openai())

input_column

Column name for input in vitals sample

target_column

Column name for target in vitals sample

result_column

Column name for result in vitals sample

location

Where to look for the target in the result: "end", "begin", "any", or "exact"

case_sensitive

Whether matching is case-sensitive

pattern

Regex pattern with capture groups. The captured groups are extracted from the result and checked against the target. Use parentheses to define capture groups, e.g., "([0-9]+)" to extract numbers.

all

Whether all captured groups must match the target (TRUE) or just one (FALSE, default).

Value

A metric function with signature function(prediction, expected_row)

Examples

if (FALSE) { # \dontrun{
# Model-graded QA metric
metric <- metric_model_graded_qa(scorer_chat = ellmer::chat_openai())
score <- metric("Paris", data.frame(target = "Paris"))

# With custom grading chat
metric <- metric_model_graded_fact(
  scorer_chat = ellmer::chat_claude(),
  partial_credit = TRUE
)
} # }
if (FALSE) { # \dontrun{
# String detection metrics
metric <- metric_detect_match(location = "end")
metric("The answer is Paris", data.frame(target = "Paris"))  # 1

metric <- metric_detect_includes()
metric("Paris is the capital", data.frame(target = "Paris"))  # 1
} # }