jpcompartir · jpcompartir · Dec 4, 2025 · Dec 3, 2025 · Dec 3, 2025 · Dec 3, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: EndpointR
 Title: Connects to various Machine Learning inference providers
-Version: 0.1.2
+Version: 0.2
 Authors@R: 
     person("Jack", "Penzer", , "Jack.penzer@sharecreative.com", role = c("aut", "cre"))
 Description: EndpointR is a 'batteries included', open-source R package for connecting to various APIs for Machine Learning model predictions. EndpointR is built for company-specific use cases, so may not be useful to a wide audience.

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+export(ant_build_messages_request)
 export(create_json_schema)
 export(get_api_key)
 export(hf_build_request)
@@ -25,6 +26,7 @@ export(oai_complete_chunks)
 export(oai_complete_df)
 export(oai_complete_text)
 export(oai_embed_batch)
+export(oai_embed_chunks)
 export(oai_embed_df)
 export(oai_embed_text)
 export(perform_requests_with_strategy)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,10 @@
+# EndpointR 0.2
+
+-   error message and status propagation improvement. Now writes .error, .error_msg (standardised across package), and .status. Main change is preventing httr2 eating the errors before we can deal with them
+-   adds parquet writing to oai_complete_df and oai_embed_df
+-   adds chunks func to oai_embed, and re-writes all batch -\> chunk logic
+-   implements the Anthropic messages API with structured outputs (via BETA)
+
 # EndpointR 0.1.2
 
 -   **File writing improvements**: `hf_embed_df()` and `hf_classify_df()` now write intermediate results as `.parquet` files to `output_dir` directories, similar to improvements in 0.1.1 for OpenAI functions
@@ -9,6 +16,7 @@
 -   **Dependency update**: Package now depends on `arrow` for faster `.parquet` file writing and reading
 
 -   **Metadata tracking**: Hugging Face functions that write to files (`hf_embed_df()`, `hf_classify_df()`, `hf_embed_chunks()`, `hf_classify_chunks()`) now write `metadata.json` to output directories containing:
+
     -   Endpoint URL and API key name used
     -   Processing parameters (chunk_size, concurrent_requests, timeout, max_retries)
     -   Inference parameters (truncate, max_length)
@@ -18,6 +26,7 @@
 -   **max_length parameter**: Added `max_length` parameter to `hf_classify_df()` and `hf_classify_chunks()` for text truncation control. Note: `hf_embed_df()` handles truncation automatically via endpoint configuration (set `AUTO_TRUNCATE` in endpoint settings)
 
 -   **New utility functions**:
+
     -   `hf_get_model_max_length()` - Retrieve maximum token length for a Hugging Face model
     -   `hf_get_endpoint_info()` - Retrieve detailed information about a Hugging Face Inference Endpoint
 
@@ -36,4 +45,3 @@ Initial BETA release, ships with:
 -   Support for text completion using OpenAI models via the Chat Completions API
 -   Support for embeddings with the OpenAI Embeddings API
 -   Structured outputs via JSON schemas and validators
-
diff --git a/R/anthropic_messages.R b/R/anthropic_messages.R
@@ -0,0 +1,175 @@
+# constants ----
+.ANT_API_VERSION <- "2023-06-01"
+.ANT_STRUCTURED_OUTPUTS_BETA <- "structured-outputs-2025-11-13"
+.ANT_MESSAGES_ENDPOINT <- "https://api.anthropic.com/v1/messages"
+.ANT_DEFAULT_MODEL <- "claude-haiku-4-5"
+
+#' Build an Anthropic Messages API request
+#'
+#' @description
+#' Constructs an httr2 request object for Anthropic's Messages API.
+#' Handles message formatting, system prompts, and optional JSON schema
+#' for structured outputs. When using strucutred outputs you must select the correct model.
+#'
+#'
+#' @details
+#' This function creates the HTTP request but does not execute it. For
+#' structured outputs, you must use a supported model (Claude Sonnet 4.5
+#' or Opus 4.1) and the request will automatically include the required
+#' beta header.
+#'
+#' The `schema` parameter accepts either:
+#' - A `json_schema` S7 object created with `create_json_schema()`
+#' - A raw list in Anthropic's `output_format` structure
+#'
+#' Unlike OpenAI, Anthropic uses `output_format` (not `response_format`)
+#' and the schema structure differs slightly.
+#'
+#' @param input Text input to send to the model
+#' @param endpointr_id An id that will persist through to response
+#' @param model Anthropic model to use (default: "claude-haiku-4.5")
+#' @param temperature Sampling temperature (0-2), higher values = more randomness
+#' @param max_tokens Maximum tokens in response
+#' @param schema Optional JSON schema for structured output (json_schema object or list)
+#' @param system_prompt Optional system prompt
+#' @param key_name Environment variable name for API key
+#' @param endpoint_url Anthropic API endpoint URL
+#' @param timeout Request timeout in seconds
+#' @param max_retries Maximum number of retry attempts for failed requests
+
+#'
+#' @return An httr2 request object
+#' @export
+#'
+#' @seealso \url{https://platform.claude.com/docs/en/build-with-claude/structured-outputs}
+#' @examples
+#' \dontrun{
+#'   # simple request
+#'   req <- ant_build_messages_request(
+#'     input = "What is the capital of France?",
+#'     max_tokens = 100
+#'   )
+#'
+#'   # with structured output
+#'   schema <- create_json_schema(
+#'     name = "capital_response",
+#'     schema = schema_object(
+#'       country = schema_string(),
+#'       capital = schema_string(),
+#'       required = c("country", "capital")
+#'     )
+#'   )
+#'   req <- ant_build_messages_request(
+#'     input = "What is the capital of France?",
+#'     schema = schema,
+#'     max_tokens = 100,
+#'     model = "sonnet-4-5"
+#'   )
+#' }
+ant_build_messages_request <- function(
+  input,
+  endpointr_id = NULL,
+  model = .ANT_DEFAULT_MODEL,
+  temperature = 0,
+  max_tokens = 500L,
+  schema = NULL,
+  system_prompt = NULL,
+  key_name = "ANTHROPIC_API_KEY",
+  endpoint_url = .ANT_MESSAGES_ENDPOINT,
+  timeout = 30L,
+  max_retries = 5L
+  ) {
+  # can't use `base_request()` from core.R because Anthropic use different auth (x-api-key) so we add as a header
+
+  stopifnot(
+    "input must be a non-empty character string" = is.character(input) && length(input) == 1 && nchar(input) > 0,
+    "model must be a character string" = is.character(model) && length(model) == 1,
+    "temperature must be numeric between 0 and 1" = is.numeric(temperature) && temperature >= 0 && temperature <= 1, # diff to OAI API
+    "max_tokens must be a positive integer" = is.numeric(max_tokens) && max_tokens > 0)
+
+  use_structured_outputs <- FALSE  # flag for later control flow
+
+  api_key <- get_api_key(key_name)
+
+  messages <- list(
+    list(role = "user", content = input)
+  )
+
+  body <- list(
+    model = model,
+    messages = messages,
+    max_tokens = as.integer(max_tokens),
+    temperature = temperature
+  )
+
+  # Anthropic API takes system_prompt as its own parameter, different to OAI where we concatenate
+
+  if(!is.null(system_prompt)){
+    if (!rlang::is_scalar_character(system_prompt)){
+      cli::cli_abort("{.arg system_prompt} must be a {.cls character} of length 1, e.g. 'This is a valid system prompt'")
+    }
+
+    body$system <- system_prompt
+  }
+
+  #
+  if(!is.null(schema)) {
+    use_structured_outputs <- TRUE
+    if (inherits(schema, "EndpointR::json_schema")) {
+      body$output_format <- .ant_format_schema(schema)
+    } else if (is.list(schema)) {
+      cli::cli_alert_warning("Your {.arg schema} is a list, not an EndpointR json_schema")
+      body$output_format <- schema
+    } else {
+      cli::cli_abort("{.arg chema} must be an EndpointR json_schema object or a list")
+    }
+  }
+
+  # build the request with headers, auth, timeout, retries, backoff (incl. system prompt if applicable)
+  request <- httr2::request(endpoint_url) |>
+    httr2::req_user_agent("EndpointR") |>
+    httr2::req_method("POST") |>
+    httr2::req_headers(
+      "Content-Type" = "application/json",
+      "x-api-key" = api_key,
+      "anthropic-version" = .ANT_API_VERSION
+    ) |>
+    httr2::req_error(is_error = ~ FALSE) |> # don't let httr2 auto-throw errors; we handle them ourselves
+    httr2::req_timeout(timeout) |>
+    httr2::req_retry(
+      max_tries = max_retries,
+      backoff = ~ 2 ^ .x,
+      retry_on_failure = TRUE
+    ) |>
+    httr2::req_body_json(body)
+
+  # if we did use structured outputs then we need to add the anthropic-beta header (this will be patched at some point I expect)
+
+  if (use_structured_outputs) {
+    request <- httr2::req_headers(request, "anthropic-beta" = .ANT_STRUCTURED_OUTPUTS_BETA)
+  }
+
+  if (!is.null(endpointr_id)) {
+    request <- httr2::req_headers(request, endpointr_id = endpointr_id)
+  }
+
+  return(request)
+}
+
+
+
+#' Convert json_schema S7 object to Anthropic output_format structure
+#' @keywords internal
+.ant_format_schema <- function(schema) {
+  if (!inherits(schema, "EndpointR::json_schema")) {
+    cli::cli_abort("schema must be a json_schema object")
+  }
+
+  # Anthropic uses output_format with type "json_schema"
+  # The schema goes directly in the "schema" field (not nested like OpenAI)
+  list(
+    type = "json_schema",
+    schema = schema@schema
+  )
+}
+