A quick reference for common dsprrr operations.
Setup & Configuration
library(dsprrr)
#>
#> Attaching package: 'dsprrr'
#> The following object is masked from 'package:methods':
#>
#> signature
library(ellmer)
# Auto-detect from environment variables (OPENAI_API_KEY, ANTHROPIC_API_KEY, etc.)
dsp_configure()
# Explicitly set provider and model
dsp_configure(provider = "openai", model = "gpt-4o-mini")
dsp_configure(provider = "anthropic", model = "claude-3-5-sonnet-latest")
# With additional parameters
dsp_configure(provider = "openai", model = "gpt-4o", temperature = 0.7)
Check Configuration Status
dsprrr_sitrep()
#> dsprrr Configuration
#> ────────────────────────────────────────
#> Default Chat: ✓ Active
#> Provider: OpenAI
#> Model: gpt-4o-mini
Scoped LLM Override
# Temporarily use a different LLM for a block
claude <- chat_claude()
with_lm(claude, {
dsp("question -> answer", question = "What is 2+2?")
dsp("text -> summary", text = "Long article...")
})
# Function-scoped LLM (auto-cleans up on exit)
my_analysis <- function(data) {
local_lm(chat_claude())
dsp("data -> summary", data = data)
}
Response Caching
# Caching is enabled by default (memory + disk)
# Configure cache behavior
configure_cache(
enable_disk = TRUE,
disk_path = ".dsprrr_cache",
memory_max_entries = 1000L
)
# View cache performance
cache_stats()
#> Hit rate: 75%
#> Memory entries: 42
# Clear caches
clear_cache("all") # Both memory and disk
clear_cache("memory") # Memory only
# Disable caching globally via environment variable
# DSPRRR_CACHE_ENABLED=false
Tip: Add .dsprrr_cache/ to your
.gitignore when using disk caching.
Signature Notation
Signatures define what goes in and what comes out.
question |
Single string input |
context, question |
Multiple inputs |
items: list[string] |
Typed list input |
Constrained Types
sentiment: enum('pos', 'neg', 'neutral') |
Fixed choices |
label: Literal['a', 'b', 'c'] |
Python-style enum |
summary: string[50, 200] |
String with length hints |
Collection Types
tags: list[string] |
List of strings |
items: array(string) |
Array notation |
words: string[] |
Bracket notation |
data: dict[string, int] |
Dictionary/map |
Optional Types
note: Optional[string] |
May be null |
Multiple Outputs
# Multiple output fields
sig <- signature("question -> answer, confidence: float")
# With different types
sig <- signature("text -> sentiment: enum('pos', 'neg'), score: float")
With Instructions
sig <- signature(
"context, question -> answer",
instructions = "Answer based only on the provided context. Be concise."
)
Creating Modules
Quick: dsp() for One-Off Calls
# With explicit Chat
chat <- chat_openai(model = "gpt-5-mini")
chat |> dsp("question -> answer", question = "What is 2+2?")
#> [1] "4"
The auto-detected form (when you have a default Chat configured):
# With auto-detected Chat (uses configured default)
dsp("question -> answer", question = "What is 2+2?")
Reusable: as_module() for Repeated Use
# Create from Chat
classifier <- chat_openai(model = "gpt-5-mini") |>
as_module("text -> sentiment: enum('positive', 'negative', 'neutral')")
# Use repeatedly
classifier$predict(text = "Love it!")
#> $sentiment
#> [1] "positive"
classifier$predict(text = "Hate it!")
#> $sentiment
#> [1] "negative"
Full Control: signature() + module()
# For optimization and complex configurations
sig <- signature("context, question -> answer")
mod <- module(sig, type = "predict")
# With custom template
mod <- module(
sig,
type = "predict",
template = "Context:\n{context}\n\nQuestion: {question}"
)
Module Types Decision Tree
What do you need?
│
├─ Simple text in/out → type = "predict" (PredictModule)
│ └─ Q&A, classification, summarization, extraction
│
└─ Tool use / multi-step reasoning → type = "react" (ReactModule)
└─ Agents, search, calculations, API calls
"predict" |
PredictModule |
Standard text generation |
"react" |
ReactModule |
Tool-calling agents |
Running Modules
Single Execution
# Using run()
result <- run(mod, question = "What is R?", .llm = chat_openai())
# Using predict method
result <- mod$predict(question = "What is R?")
Batch Processing
# Vector inputs
results <- mod$predict(text = c("Great!", "Awful!", "Meh"))
# Data frame
new_data <- data.frame(text = c("A", "B", "C"))
results <- predict(mod, new_data = new_data)
# run_dataset for full control
results <- run_dataset(mod, dataset, .llm = llm)
Show Prompt
# See the prompt being sent
result <- run(mod, question = "Test", .llm = llm, .show_prompt = TRUE)
Metrics & Evaluation
Built-in Metrics
metric_exact_match() |
Exact string equality |
metric_f1() |
Token overlap similarity |
metric_contains() |
Output contains expected substring |
metric_field_match(fields) |
Match specific output fields |
metric_threshold(metric, 0.8) |
Apply threshold to any metric |
metric_custom(fn) |
Custom scoring function |
Metric Selection Guide
What are you measuring?
│
├─ Exact correctness → metric_exact_match()
│ └─ Facts, names, simple answers
│
├─ Approximate match → metric_f1() or metric_threshold(metric_f1(), 0.8)
│ └─ Paraphrased answers, spelling tolerance
│
├─ Contains key info → metric_contains()
│ └─ Important terms must appear
│
├─ Specific fields → metric_field_match(c("answer", "score"))
│ └─ Multi-field outputs
│
└─ Custom logic → metric_custom(my_scorer_fn)
└─ Domain-specific evaluation
Metric by Task Type
| Classification |
metric_exact_match() |
Answer must be exactly right |
| Extraction |
metric_exact_match(field = "...") |
Extract specific field |
| Generation |
metric_f1() |
Partial credit for overlap |
| Yes/No questions |
metric_exact_match(ignore_case = TRUE) |
“Yes” = “yes” |
| Contains keyword |
metric_contains() |
Answer includes key info |
| Complex evaluation |
Custom function |
Domain-specific logic |
Optimization
Grid Search
# Search over parameters
mod$optimize_grid(
devset = train_data,
metric = metric_exact_match(),
.llm = llm,
parameters = list(
prompt_style = c("concise", "detailed"),
temperature = c(0.0, 0.3, 0.7)
)
)
# Check results
module_trials(mod)
module_metrics(mod)
Debugging & Inspection
Inspect Last Call
# Get the last prompt sent
get_last_prompt()
#> ── Last Prompt ──
#> Model: gpt-4o-mini via OpenAI
#> Input tokens: 45
#>
#> Question: What is 2+2?
Module Inspection
# Detailed module state
mod$inspect()
# Last trace from dsp()
get_last_trace()
Production
Save/Restore Configuration
Vitals Integration
# Convert module to vitals solver
solver <- as_vitals_solver(mod)
# Use with vitals::eval_task()
library(vitals)
result <- eval_task(my_task, solver)
# Convert vitals scorer to dsprrr metric
my_metric <- as_dsprrr_metric(vitals_scorer)
Common Patterns
RAG (Retrieval-Augmented Generation)
sig <- signature(
"context, question -> answer",
instructions = "Answer based only on the provided context."
)
mod <- module(sig, type = "predict")
# Use with retrieved context
result <- run(mod,
context = retrieved_docs,
question = user_query,
.llm = llm
)
Classification
classifier <- chat_openai() |>
as_module("text -> label: enum('spam', 'not_spam')")
labels <- classifier$predict(text = emails)
extractor <- chat_openai() |>
as_module("text -> entities: list[string], summary: string")
result <- extractor$predict(text = document)
result$entities
result$summary
Chain of Thought (via instructions)
sig <- signature(
"problem -> answer",
instructions = "Think step by step. Show your reasoning before the final answer."
)