Quick Reference

A quick reference for common dsprrr operations.

Setup & Configuration

library(dsprrr)
#> 
#> Attaching package: 'dsprrr'
#> The following object is masked from 'package:stats':
#> 
#>     step
#> The following object is masked from 'package:methods':
#> 
#>     signature
library(ellmer)

Configure Default LLM

# Auto-detect from environment variables (OPENAI_API_KEY, ANTHROPIC_API_KEY, etc.)
dsp_configure()

# Explicitly set provider and model
dsp_configure(provider = "openai", model = "gpt-4o-mini")
dsp_configure(provider = "anthropic", model = "claude-3-5-sonnet-latest")

# With additional parameters
dsp_configure(provider = "openai", model = "gpt-4o", temperature = 0.7)

Check Configuration Status

dsprrr_sitrep()
#> dsprrr Configuration
#> ────────────────────────────────────────
#> Default Chat: ✓ Active
#>   Provider: OpenAI
#>   Model: gpt-4o-mini

Default Chat Management

# Set a specific Chat as default
set_default_chat(chat_openai(model = "gpt-4o"))

# Get the current default Chat
chat <- get_default_chat()

# Clear the default (requires explicit .llm in calls)
clear_default_chat()

Scoped LLM Override

# Temporarily use a different LLM for a block
claude <- chat_claude()
with_lm(claude, {
  dsp("question -> answer", question = "What is 2+2?")
  dsp("text -> summary", text = "Long article...")
})

# Function-scoped LLM (auto-cleans up on exit)
my_analysis <- function(data) {
  local_lm(chat_claude())
  dsp("data -> summary", data = data)
}

Response Caching

# Caching is enabled by default (memory + disk)
# Configure cache behavior
configure_cache(
 enable_disk = TRUE,
  disk_path = ".dsprrr_cache",
  memory_max_entries = 1000L
)

# View cache performance
cache_stats()
#> Hit rate: 75%
#> Memory entries: 42

# Clear caches
clear_cache("all")    # Both memory and disk
clear_cache("memory") # Memory only

# Disable caching globally via environment variable
# DSPRRR_CACHE_ENABLED=false

Tip: Add .dsprrr_cache/ to your .gitignore when using disk caching.

API Quick Reference

Function	Purpose	Returns
`dsp()`	One-off structured LLM call	Output value(s)
`as_module()`	Create reusable module from Chat	Module object
`signature()`	Define input/output schema	Signature object
`module()`	Create module with full control	Module object
`run()`	Execute module on single input	tibble with output
`run_dataset()`	Execute on data frame	tibble with outputs
`evaluate()`	Compute metrics on test data	Evaluation result
`compile()`	Optimize with teleprompter	Compiled module
`with_lm()`	Scoped LLM override for code block	Block result
`local_lm()`	Function-scoped LLM override	Previous LLM
`configure_cache()`	Set cache options	Previous config
`cache_stats()`	View cache hit rate and size	Stats list
`get_last_prompt()`	Inspect last prompt sent	Prompt text
`get_last_trace()`	Get trace from last `dsp()` call	Trace object

Signature Notation

Signatures define what goes in and what comes out.

Basic Format

inputs -> outputs

Input Types

Notation	Meaning
`question`	Single string input
`context, question`	Multiple inputs
`items: list[string]`	Typed list input

Output Types

Notation	R/ellmer Type	Use Case
`answer`	`type_string()`	Free-form text
`answer: string`	`type_string()`	Explicit string
`score: int`	`type_integer()`	Whole numbers
`score: float`	`type_number()`	Decimal numbers
`score: number[0, 100]`	`type_number()`	Bounded numbers
`valid: bool`	`type_boolean()`	True/false

Constrained Types

Notation	Description
`sentiment: enum('pos', 'neg', 'neutral')`	Fixed choices
`label: Literal['a', 'b', 'c']`	Python-style enum
`summary: string[50, 200]`	String with length hints

Collection Types

Notation	Description
`tags: list[string]`	List of strings
`items: array(string)`	Array notation
`words: string[]`	Bracket notation
`data: dict[string, int]`	Dictionary/map

Optional Types

Notation	Description
`note: Optional[string]`	May be null

Multiple Outputs

# Multiple output fields
sig <- signature("question -> answer, confidence: float")

# With different types
sig <- signature("text -> sentiment: enum('pos', 'neg'), score: float")

With Instructions

sig <- signature(
  "context, question -> answer",
  instructions = "Answer based only on the provided context. Be concise."
)

Creating Modules

Quick: `dsp()` for One-Off Calls

# With explicit Chat
chat <- chat_openai(model = "gpt-5-mini")
chat |> dsp("question -> answer", question = "What is 2+2?")
#> [1] "4"

The auto-detected form (when you have a default Chat configured):

# With auto-detected Chat (uses configured default)
dsp("question -> answer", question = "What is 2+2?")

Reusable: `as_module()` for Repeated Use

# Create from Chat
classifier <- chat_openai(model = "gpt-5-mini") |>
  as_module("text -> sentiment: enum('positive', 'negative', 'neutral')")

# Use repeatedly
classifier$predict(text = "Love it!")
#> $sentiment
#> [1] "positive"
classifier$predict(text = "Hate it!")
#> $sentiment
#> [1] "negative"

Full Control: `signature()` + `module()`

# For optimization and complex configurations
sig <- signature("context, question -> answer")
mod <- module(sig, type = "predict")

# With custom template
mod <- module(
  sig,
  type = "predict",
  template = "Context:\n{context}\n\nQuestion: {question}"
)

Module Types Decision Tree

flowchart TB
  Start["What do you need?"]
  Start -->|Simple text in/out| Predict["type = predict (PredictModule)"]
  Predict --> PredictUse["Q&A, classification, summarization, extraction"]
  Start -->|Tool use / multi-step reasoning| React["type = react (ReactModule)"]
  React --> ReactUse["Agents, search, calculations, API calls"]

Type	Class	Use Case
`"predict"`	`PredictModule`	Standard text generation
`"react"`	`ReactModule`	Tool-calling agents

Running Modules

Single Execution

# Using run()
result <- run(mod, question = "What is R?", .llm = chat_openai())

# Using predict method
result <- mod$predict(question = "What is R?")

Batch Processing

# Vector inputs
results <- mod$predict(text = c("Great!", "Awful!", "Meh"))

# Data frame
new_data <- data.frame(text = c("A", "B", "C"))
results <- predict(mod, new_data = new_data)

# run_dataset for full control
results <- run_dataset(mod, dataset, .llm = llm)

Show Prompt

# See the prompt being sent
result <- run(mod, question = "Test", .llm = llm, .show_prompt = TRUE)

Metrics & Evaluation

Built-in Metrics

Metric	Use Case
`metric_exact_match()`	Exact string equality
`metric_f1()`	Token overlap similarity
`metric_contains()`	Output contains expected substring
`metric_field_match(fields)`	Match specific output fields
`metric_threshold(metric, 0.8)`	Apply threshold to any metric
`metric_custom(fn)`	Custom scoring function

Metric Selection Guide

flowchart TB
  Start["What are you measuring?"]
  Start -->|Exact correctness| Exact["metric_exact_match()"]
  Exact --> ExactUse["Facts, names, simple answers"]
  Start -->|Approximate match| F1["metric_f1() or metric_threshold(metric_f1(), 0.8)"]
  F1 --> F1Use["Paraphrased answers, spelling tolerance"]
  Start -->|Contains key info| Contains["metric_contains()"]
  Contains --> ContainsUse["Important terms must appear"]
  Start -->|Specific fields| Field["metric_field_match(c('answer', 'score'))"]
  Field --> FieldUse["Multi-field outputs"]
  Start -->|Custom logic| Custom["metric_custom(my_scorer_fn)"]
  Custom --> CustomUse["Domain-specific evaluation"]

Metric by Task Type

Task	Recommended Metric	Why
Classification	`metric_exact_match()`	Answer must be exactly right
Extraction	`metric_exact_match(field = "...")`	Extract specific field
Generation	`metric_f1()`	Partial credit for overlap
Yes/No questions	`metric_exact_match(ignore_case = TRUE)`	“Yes” = “yes”
Contains keyword	`metric_contains()`	Answer includes key info
Complex evaluation	Custom function	Domain-specific logic

Running Evaluation

# Basic evaluation
result <- evaluate(mod, testset, metric = metric_exact_match(), .llm = llm)
result$mean_score
#> 0.85

# With dsp()
result <- evaluate_dsp(
  "question -> answer",
  testset,
  metric = metric_exact_match()
)

Optimization

Grid Search

# Search over parameters
mod$optimize_grid(
  devset = train_data,
  metric = metric_exact_match(),
  .llm = llm,
  parameters = list(
    prompt_style = c("concise", "detailed"),
    temperature = c(0.0, 0.3, 0.7)
  )
)

# Check results
module_trials(mod)
module_metrics(mod)

Teleprompters

# Few-shot learning
tp <- LabeledFewShot(k = 4L, metric = metric_exact_match())
compiled <- compile(tp, mod, trainset, .llm = llm)

# Grid search teleprompter
tp <- GridSearchTeleprompter(
  instructions = c("Be concise", "Be thorough"),
  metric = metric_exact_match()
)
compiled <- compile(tp, mod, trainset, .llm = llm)

Debugging & Inspection

Inspect Last Call

# Get the last prompt sent
get_last_prompt()
#> ── Last Prompt ──
#> Model: gpt-4o-mini via OpenAI
#> Input tokens: 45
#>
#> Question: What is 2+2?

Prompt History

# View recent prompts
inspect_history(n = 5)

# Clear history
clear_prompt_history()

Module Inspection

# Detailed module state
mod$inspect()

# Last trace from dsp()
get_last_trace()

Traces

# Get all traces from a module
traces <- mod$get_traces()

# Export to tibble
export_traces(mod)

# Summary statistics
summarize_traces(mod)

# Clear traces
clear_traces(mod)

Production

Save/Restore Configuration

# Save optimized config to pins board
pin_module_config(mod, board, "my-classifier-v1")

# Restore later
mod <- restore_module_config(board, "my-classifier-v1", .llm = llm)

Vitals Integration

# Convert module to vitals solver
solver <- as_vitals_solver(mod)

# Use with vitals::eval_task()
library(vitals)
result <- eval_task(my_task, solver)

# Convert vitals scorer to dsprrr metric
my_metric <- as_dsprrr_metric(vitals_scorer)

Project Template

# Create production-ready project structure
use_dsprrr_template("my_project")

# Validate workflow configuration
validate_workflow("workflow.yml")

Common Patterns

RAG (Retrieval-Augmented Generation)

sig <- signature(
  "context, question -> answer",
  instructions = "Answer based only on the provided context."
)
mod <- module(sig, type = "predict")

# Use with retrieved context
result <- run(mod,
  context = retrieved_docs,
  question = user_query,
  .llm = llm
)

Classification

classifier <- chat_openai() |>
  as_module("text -> label: enum('spam', 'not_spam')")

labels <- classifier$predict(text = emails)

Extraction

extractor <- chat_openai() |>
  as_module("text -> entities: list[string], summary: string")

result <- extractor$predict(text = document)
result$entities
result$summary

Chain of Thought (via instructions)

sig <- signature(
  "problem -> answer",
  instructions = "Think step by step. Show your reasoning before the final answer."
)