From 7ef24377f14cb604e44194b2095a69cdd1caed8d Mon Sep 17 00:00:00 2001 From: Terence Pae Date: Mon, 19 Jan 2026 11:04:57 -0800 Subject: [PATCH 1/2] added Open Responses API support --- .../OsaurusCore/Models/OpenResponsesAPI.swift | 837 ++++++++++++++++++ .../Models/RemoteProviderConfiguration.swift | 5 +- .../OsaurusCore/Models/ResponseWriters.swift | 297 +++++++ .../OsaurusCore/Networking/HTTPHandler.swift | 417 +++++++++ .../Services/RemoteProviderManager.swift | 2 +- .../Services/RemoteProviderService.swift | 293 ++++++ Packages/OsaurusCore/Views/ServerView.swift | 14 + README.md | 14 +- docs/DEVELOPER_TOOLS.md | 13 +- docs/FEATURES.md | 37 + docs/OpenAI_API_GUIDE.md | 187 +++- docs/REMOTE_PROVIDERS.md | 27 +- 12 files changed, 2118 insertions(+), 25 deletions(-) create mode 100644 Packages/OsaurusCore/Models/OpenResponsesAPI.swift diff --git a/Packages/OsaurusCore/Models/OpenResponsesAPI.swift b/Packages/OsaurusCore/Models/OpenResponsesAPI.swift new file mode 100644 index 00000000..c5fe8890 --- /dev/null +++ b/Packages/OsaurusCore/Models/OpenResponsesAPI.swift @@ -0,0 +1,837 @@ +// +// OpenResponsesAPI.swift +// osaurus +// +// Open Responses API compatible request/response models. +// Based on the Open Responses specification: https://www.openresponses.org +// + +import Foundation + +// MARK: - Request Models + +/// Open Responses API create request +public struct OpenResponsesRequest: Codable, Sendable { + /// Model identifier + public let model: String + /// Input content - can be a string or array of input items + public let input: OpenResponsesInput + /// Whether to stream the response + public let stream: Bool? + /// Available tools for the model to use + public let tools: [OpenResponsesTool]? + /// Tool choice configuration + public let tool_choice: OpenResponsesToolChoice? + /// Temperature for sampling + public let temperature: Float? + /// Maximum tokens to generate + public let max_output_tokens: Int? + /// Top-p sampling parameter + public let top_p: Float? + /// Instructions/system prompt + public let instructions: String? + /// Previous response ID for multi-turn conversations + public let previous_response_id: String? + /// Optional metadata + public let metadata: [String: String]? +} + +/// Input can be a string or array of input items +public enum OpenResponsesInput: Codable, Sendable { + case text(String) + case items([OpenResponsesInputItem]) + + public init(from decoder: Decoder) throws { + let container = try decoder.singleValueContainer() + if let text = try? container.decode(String.self) { + self = .text(text) + } else if let items = try? container.decode([OpenResponsesInputItem].self) { + self = .items(items) + } else { + throw DecodingError.typeMismatch( + OpenResponsesInput.self, + DecodingError.Context( + codingPath: decoder.codingPath, + debugDescription: "Expected String or [OpenResponsesInputItem]" + ) + ) + } + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.singleValueContainer() + switch self { + case .text(let text): + try container.encode(text) + case .items(let items): + try container.encode(items) + } + } +} + +/// Input item types +public enum OpenResponsesInputItem: Codable, Sendable { + case message(OpenResponsesMessageItem) + case functionCallOutput(OpenResponsesFunctionCallOutputItem) + + private enum CodingKeys: String, CodingKey { + case type + } + + public init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + let type = try container.decode(String.self, forKey: .type) + + switch type { + case "message": + self = .message(try OpenResponsesMessageItem(from: decoder)) + case "function_call_output": + self = .functionCallOutput(try OpenResponsesFunctionCallOutputItem(from: decoder)) + default: + throw DecodingError.dataCorruptedError( + forKey: .type, + in: container, + debugDescription: "Unknown input item type: \(type)" + ) + } + } + + public func encode(to encoder: Encoder) throws { + switch self { + case .message(let item): + try item.encode(to: encoder) + case .functionCallOutput(let item): + try item.encode(to: encoder) + } + } +} + +/// Message input item +public struct OpenResponsesMessageItem: Codable, Sendable { + public let type: String + public let role: String + public let content: OpenResponsesMessageContent + + public init(role: String, content: OpenResponsesMessageContent) { + self.type = "message" + self.role = role + self.content = content + } +} + +/// Message content can be string or array of content parts +public enum OpenResponsesMessageContent: Codable, Sendable { + case text(String) + case parts([OpenResponsesContentPart]) + + public init(from decoder: Decoder) throws { + let container = try decoder.singleValueContainer() + if let text = try? container.decode(String.self) { + self = .text(text) + } else if let parts = try? container.decode([OpenResponsesContentPart].self) { + self = .parts(parts) + } else { + throw DecodingError.typeMismatch( + OpenResponsesMessageContent.self, + DecodingError.Context( + codingPath: decoder.codingPath, + debugDescription: "Expected String or [OpenResponsesContentPart]" + ) + ) + } + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.singleValueContainer() + switch self { + case .text(let text): + try container.encode(text) + case .parts(let parts): + try container.encode(parts) + } + } + + /// Extract plain text from content + public var plainText: String { + switch self { + case .text(let text): + return text + case .parts(let parts): + return parts.compactMap { part in + if case .inputText(let textPart) = part { + return textPart.text + } + return nil + }.joined(separator: "\n") + } + } +} + +/// Content part types +public enum OpenResponsesContentPart: Codable, Sendable { + case inputText(OpenResponsesInputTextPart) + case inputImage(OpenResponsesInputImagePart) + + private enum CodingKeys: String, CodingKey { + case type + } + + public init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + let type = try container.decode(String.self, forKey: .type) + + switch type { + case "input_text": + self = .inputText(try OpenResponsesInputTextPart(from: decoder)) + case "input_image": + self = .inputImage(try OpenResponsesInputImagePart(from: decoder)) + default: + throw DecodingError.dataCorruptedError( + forKey: .type, + in: container, + debugDescription: "Unknown content part type: \(type)" + ) + } + } + + public func encode(to encoder: Encoder) throws { + switch self { + case .inputText(let part): + try part.encode(to: encoder) + case .inputImage(let part): + try part.encode(to: encoder) + } + } +} + +/// Text content part +public struct OpenResponsesInputTextPart: Codable, Sendable { + public let type: String + public let text: String + + public init(text: String) { + self.type = "input_text" + self.text = text + } +} + +/// Image content part +public struct OpenResponsesInputImagePart: Codable, Sendable { + public let type: String + public let image_url: String? + public let detail: String? + + public init(imageUrl: String, detail: String? = nil) { + self.type = "input_image" + self.image_url = imageUrl + self.detail = detail + } +} + +/// Function call output item (tool result) +public struct OpenResponsesFunctionCallOutputItem: Codable, Sendable { + public let type: String + public let call_id: String + public let output: String + + public init(callId: String, output: String) { + self.type = "function_call_output" + self.call_id = callId + self.output = output + } +} + +// MARK: - Tool Definitions + +/// Tool definition +public struct OpenResponsesTool: Codable, Sendable { + public let type: String + public let name: String + public let description: String? + public let parameters: JSONValue? + + public init(name: String, description: String?, parameters: JSONValue?) { + self.type = "function" + self.name = name + self.description = description + self.parameters = parameters + } +} + +/// Tool choice configuration +public enum OpenResponsesToolChoice: Codable, Sendable { + case auto + case none + case required + case function(name: String) + + private enum CodingKeys: String, CodingKey { + case type, name + } + + public init(from decoder: Decoder) throws { + // Try decoding as string first + if let container = try? decoder.singleValueContainer(), + let str = try? container.decode(String.self) + { + switch str { + case "auto": self = .auto + case "none": self = .none + case "required": self = .required + default: self = .auto + } + return + } + + // Try decoding as object + let container = try decoder.container(keyedBy: CodingKeys.self) + let type = try container.decode(String.self, forKey: .type) + switch type { + case "auto": self = .auto + case "none": self = .none + case "required": self = .required + case "function": + let name = try container.decode(String.self, forKey: .name) + self = .function(name: name) + default: + self = .auto + } + } + + public func encode(to encoder: Encoder) throws { + switch self { + case .auto: + var container = encoder.singleValueContainer() + try container.encode("auto") + case .none: + var container = encoder.singleValueContainer() + try container.encode("none") + case .required: + var container = encoder.singleValueContainer() + try container.encode("required") + case .function(let name): + var container = encoder.container(keyedBy: CodingKeys.self) + try container.encode("function", forKey: .type) + try container.encode(name, forKey: .name) + } + } +} + +// MARK: - Response Models + +/// Open Responses API response +public struct OpenResponsesResponse: Codable, Sendable { + public let id: String + public let object: String + public let created_at: Int + public let status: OpenResponsesStatus + public let model: String + public let output: [OpenResponsesOutputItem] + public let usage: OpenResponsesUsage? + public let metadata: [String: String]? + + public init( + id: String, + createdAt: Int, + status: OpenResponsesStatus, + model: String, + output: [OpenResponsesOutputItem], + usage: OpenResponsesUsage? + ) { + self.id = id + self.object = "response" + self.created_at = createdAt + self.status = status + self.model = model + self.output = output + self.usage = usage + self.metadata = nil + } +} + +/// Response status +public enum OpenResponsesStatus: String, Codable, Sendable { + case inProgress = "in_progress" + case completed = "completed" + case failed = "failed" + case cancelled = "cancelled" + case incomplete = "incomplete" +} + +/// Output item types +public enum OpenResponsesOutputItem: Codable, Sendable { + case message(OpenResponsesOutputMessage) + case functionCall(OpenResponsesFunctionCall) + + private enum CodingKeys: String, CodingKey { + case type + } + + public init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + let type = try container.decode(String.self, forKey: .type) + + switch type { + case "message": + self = .message(try OpenResponsesOutputMessage(from: decoder)) + case "function_call": + self = .functionCall(try OpenResponsesFunctionCall(from: decoder)) + default: + throw DecodingError.dataCorruptedError( + forKey: .type, + in: container, + debugDescription: "Unknown output item type: \(type)" + ) + } + } + + public func encode(to encoder: Encoder) throws { + switch self { + case .message(let item): + try item.encode(to: encoder) + case .functionCall(let item): + try item.encode(to: encoder) + } + } +} + +/// Output message item +public struct OpenResponsesOutputMessage: Codable, Sendable { + public let type: String + public let id: String + public let status: OpenResponsesItemStatus + public let role: String + public let content: [OpenResponsesOutputContent] + + public init(id: String, status: OpenResponsesItemStatus, content: [OpenResponsesOutputContent]) { + self.type = "message" + self.id = id + self.status = status + self.role = "assistant" + self.content = content + } +} + +/// Item status +public enum OpenResponsesItemStatus: String, Codable, Sendable { + case inProgress = "in_progress" + case completed = "completed" +} + +/// Output content types +public enum OpenResponsesOutputContent: Codable, Sendable { + case outputText(OpenResponsesOutputText) + + private enum CodingKeys: String, CodingKey { + case type + } + + public init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + let type = try container.decode(String.self, forKey: .type) + + switch type { + case "output_text": + self = .outputText(try OpenResponsesOutputText(from: decoder)) + default: + throw DecodingError.dataCorruptedError( + forKey: .type, + in: container, + debugDescription: "Unknown output content type: \(type)" + ) + } + } + + public func encode(to encoder: Encoder) throws { + switch self { + case .outputText(let content): + try content.encode(to: encoder) + } + } +} + +/// Output text content +public struct OpenResponsesOutputText: Codable, Sendable { + public let type: String + public let text: String + + public init(text: String) { + self.type = "output_text" + self.text = text + } +} + +/// Function call output item +public struct OpenResponsesFunctionCall: Codable, Sendable { + public let type: String + public let id: String + public let status: OpenResponsesItemStatus + public let call_id: String + public let name: String + public let arguments: String + + public init(id: String, status: OpenResponsesItemStatus, callId: String, name: String, arguments: String) { + self.type = "function_call" + self.id = id + self.status = status + self.call_id = callId + self.name = name + self.arguments = arguments + } +} + +/// Token usage information +public struct OpenResponsesUsage: Codable, Sendable { + public let input_tokens: Int + public let output_tokens: Int + public let total_tokens: Int + + public init(inputTokens: Int, outputTokens: Int) { + self.input_tokens = inputTokens + self.output_tokens = outputTokens + self.total_tokens = inputTokens + outputTokens + } +} + +// MARK: - Streaming Event Models + +/// response.created event +public struct ResponseCreatedEvent: Codable, Sendable { + public let type: String + public let sequence_number: Int + public let response: OpenResponsesResponse + + public init(sequenceNumber: Int, response: OpenResponsesResponse) { + self.type = "response.created" + self.sequence_number = sequenceNumber + self.response = response + } +} + +/// response.in_progress event +public struct ResponseInProgressEvent: Codable, Sendable { + public let type: String + public let sequence_number: Int + public let response: OpenResponsesResponse + + public init(sequenceNumber: Int, response: OpenResponsesResponse) { + self.type = "response.in_progress" + self.sequence_number = sequenceNumber + self.response = response + } +} + +/// response.output_item.added event +public struct OutputItemAddedEvent: Codable, Sendable { + public let type: String + public let sequence_number: Int + public let output_index: Int + public let item: OpenResponsesOutputItem + + public init(sequenceNumber: Int, outputIndex: Int, item: OpenResponsesOutputItem) { + self.type = "response.output_item.added" + self.sequence_number = sequenceNumber + self.output_index = outputIndex + self.item = item + } +} + +/// response.content_part.added event +public struct ContentPartAddedEvent: Codable, Sendable { + public let type: String + public let sequence_number: Int + public let item_id: String + public let output_index: Int + public let content_index: Int + public let part: OpenResponsesOutputContent + + public init( + sequenceNumber: Int, + itemId: String, + outputIndex: Int, + contentIndex: Int, + part: OpenResponsesOutputContent + ) { + self.type = "response.content_part.added" + self.sequence_number = sequenceNumber + self.item_id = itemId + self.output_index = outputIndex + self.content_index = contentIndex + self.part = part + } +} + +/// response.output_text.delta event +public struct OutputTextDeltaEvent: Codable, Sendable { + public let type: String + public let sequence_number: Int + public let item_id: String + public let output_index: Int + public let content_index: Int + public let delta: String + + public init(sequenceNumber: Int, itemId: String, outputIndex: Int, contentIndex: Int, delta: String) { + self.type = "response.output_text.delta" + self.sequence_number = sequenceNumber + self.item_id = itemId + self.output_index = outputIndex + self.content_index = contentIndex + self.delta = delta + } +} + +/// response.output_text.done event +public struct OutputTextDoneEvent: Codable, Sendable { + public let type: String + public let sequence_number: Int + public let item_id: String + public let output_index: Int + public let content_index: Int + public let text: String + + public init(sequenceNumber: Int, itemId: String, outputIndex: Int, contentIndex: Int, text: String) { + self.type = "response.output_text.done" + self.sequence_number = sequenceNumber + self.item_id = itemId + self.output_index = outputIndex + self.content_index = contentIndex + self.text = text + } +} + +/// response.output_item.done event +public struct OutputItemDoneEvent: Codable, Sendable { + public let type: String + public let sequence_number: Int + public let output_index: Int + public let item: OpenResponsesOutputItem + + public init(sequenceNumber: Int, outputIndex: Int, item: OpenResponsesOutputItem) { + self.type = "response.output_item.done" + self.sequence_number = sequenceNumber + self.output_index = outputIndex + self.item = item + } +} + +/// response.function_call_arguments.delta event +public struct FunctionCallArgumentsDeltaEvent: Codable, Sendable { + public let type: String + public let sequence_number: Int + public let item_id: String + public let output_index: Int + public let call_id: String + public let delta: String + + public init(sequenceNumber: Int, itemId: String, outputIndex: Int, callId: String, delta: String) { + self.type = "response.function_call_arguments.delta" + self.sequence_number = sequenceNumber + self.item_id = itemId + self.output_index = outputIndex + self.call_id = callId + self.delta = delta + } +} + +/// response.function_call_arguments.done event +public struct FunctionCallArgumentsDoneEvent: Codable, Sendable { + public let type: String + public let sequence_number: Int + public let item_id: String + public let output_index: Int + public let call_id: String + public let arguments: String + + public init(sequenceNumber: Int, itemId: String, outputIndex: Int, callId: String, arguments: String) { + self.type = "response.function_call_arguments.done" + self.sequence_number = sequenceNumber + self.item_id = itemId + self.output_index = outputIndex + self.call_id = callId + self.arguments = arguments + } +} + +/// response.completed event +public struct ResponseCompletedEvent: Codable, Sendable { + public let type: String + public let sequence_number: Int + public let response: OpenResponsesResponse + + public init(sequenceNumber: Int, response: OpenResponsesResponse) { + self.type = "response.completed" + self.sequence_number = sequenceNumber + self.response = response + } +} + +/// response.failed event +public struct ResponseFailedEvent: Codable, Sendable { + public let type: String + public let sequence_number: Int + public let response: OpenResponsesResponse + public let error: OpenResponsesError? + + public init(sequenceNumber: Int, response: OpenResponsesResponse, error: OpenResponsesError?) { + self.type = "response.failed" + self.sequence_number = sequenceNumber + self.response = response + self.error = error + } +} + +// MARK: - Error Models + +/// Open Responses error +public struct OpenResponsesError: Codable, Sendable { + public let type: String + public let code: String + public let message: String + + public init(code: String, message: String) { + self.type = "error" + self.code = code + self.message = message + } +} + +/// Error response wrapper +public struct OpenResponsesErrorResponse: Codable, Sendable { + public let error: OpenResponsesError + + public init(code: String, message: String) { + self.error = OpenResponsesError(code: code, message: message) + } +} + +// MARK: - Conversion Helpers + +extension OpenResponsesRequest { + /// Convert Open Responses request to internal ChatCompletionRequest + func toChatCompletionRequest() -> ChatCompletionRequest { + var messages: [ChatMessage] = [] + + // Add instructions as system message if present + if let instructions = instructions, !instructions.isEmpty { + messages.append(ChatMessage(role: "system", content: instructions)) + } + + // Convert input to messages + switch input { + case .text(let text): + messages.append(ChatMessage(role: "user", content: text)) + case .items(let items): + for item in items { + switch item { + case .message(let messageItem): + messages.append(ChatMessage(role: messageItem.role, content: messageItem.content.plainText)) + case .functionCallOutput(let outputItem): + messages.append( + ChatMessage( + role: "tool", + content: outputItem.output, + tool_calls: nil, + tool_call_id: outputItem.call_id + ) + ) + } + } + } + + // Convert tools + var openAITools: [Tool]? = nil + if let tools = tools { + openAITools = tools.map { tool in + Tool( + type: "function", + function: ToolFunction( + name: tool.name, + description: tool.description, + parameters: tool.parameters + ) + ) + } + } + + // Convert tool choice + var openAIToolChoice: ToolChoiceOption? = nil + if let choice = tool_choice { + switch choice { + case .auto: + openAIToolChoice = .auto + case .none: + openAIToolChoice = ToolChoiceOption.none + case .required: + openAIToolChoice = .auto + case .function(let name): + openAIToolChoice = .function( + ToolChoiceOption.FunctionName( + type: "function", + function: ToolChoiceOption.Name(name: name) + ) + ) + } + } + + return ChatCompletionRequest( + model: model, + messages: messages, + temperature: temperature, + max_tokens: max_output_tokens, + stream: stream, + top_p: top_p, + frequency_penalty: nil, + presence_penalty: nil, + stop: nil, + n: nil, + tools: openAITools, + tool_choice: openAIToolChoice, + session_id: nil + ) + } +} + +extension ChatCompletionResponse { + /// Convert internal ChatCompletionResponse to Open Responses format + func toOpenResponsesResponse(responseId: String) -> OpenResponsesResponse { + var outputItems: [OpenResponsesOutputItem] = [] + + for choice in choices { + let itemId = "item_\(UUID().uuidString.replacingOccurrences(of: "-", with: "").prefix(24))" + + // Check for tool calls + if let toolCalls = choice.message.tool_calls, !toolCalls.isEmpty { + for toolCall in toolCalls { + let funcCall = OpenResponsesFunctionCall( + id: itemId, + status: .completed, + callId: toolCall.id, + name: toolCall.function.name, + arguments: toolCall.function.arguments + ) + outputItems.append(.functionCall(funcCall)) + } + } else if let content = choice.message.content { + // Regular text message + let outputMessage = OpenResponsesOutputMessage( + id: itemId, + status: .completed, + content: [.outputText(OpenResponsesOutputText(text: content))] + ) + outputItems.append(.message(outputMessage)) + } + } + + return OpenResponsesResponse( + id: responseId, + createdAt: created, + status: .completed, + model: model, + output: outputItems, + usage: OpenResponsesUsage( + inputTokens: usage.prompt_tokens, + outputTokens: usage.completion_tokens + ) + ) + } +} diff --git a/Packages/OsaurusCore/Models/RemoteProviderConfiguration.swift b/Packages/OsaurusCore/Models/RemoteProviderConfiguration.swift index 8d7076a2..349ee31b 100644 --- a/Packages/OsaurusCore/Models/RemoteProviderConfiguration.swift +++ b/Packages/OsaurusCore/Models/RemoteProviderConfiguration.swift @@ -36,11 +36,13 @@ public enum RemoteProviderAuthType: String, Codable, Sendable, CaseIterable { public enum RemoteProviderType: String, Codable, Sendable, CaseIterable { case openai = "openai" // OpenAI-compatible API (default) case anthropic = "anthropic" // Anthropic Messages API + case openResponses = "openResponses" // Open Responses API public var displayName: String { switch self { case .openai: return "OpenAI Compatible" case .anthropic: return "Anthropic" + case .openResponses: return "Open Responses" } } @@ -48,6 +50,7 @@ public enum RemoteProviderType: String, Codable, Sendable, CaseIterable { switch self { case .openai: return "/chat/completions" case .anthropic: return "/messages" + case .openResponses: return "/responses" } } @@ -228,7 +231,7 @@ public struct RemoteProvider: Codable, Identifiable, Sendable, Equatable { if headers["anthropic-version"] == nil { headers["anthropic-version"] = "2023-06-01" } - case .openai: + case .openai, .openResponses: headers["Authorization"] = "Bearer \(apiKey)" } } diff --git a/Packages/OsaurusCore/Models/ResponseWriters.swift b/Packages/OsaurusCore/Models/ResponseWriters.swift index 3bd5056b..18997a8e 100644 --- a/Packages/OsaurusCore/Models/ResponseWriters.swift +++ b/Packages/OsaurusCore/Models/ResponseWriters.swift @@ -554,3 +554,300 @@ final class AnthropicSSEResponseWriter { } } } + +// MARK: - Open Responses SSE Response Writer + +/// SSE Response Writer for Open Responses API format +/// Emits semantic events: response.created, response.output_item.added, response.output_text.delta, etc. +final class OpenResponsesSSEWriter { + private var responseId: String = "" + private var model: String = "" + private var inputTokens: Int = 0 + private var outputTokens: Int = 0 + private var sequenceNumber: Int = 0 + private var currentItemId: String = "" + private var currentOutputIndex: Int = 0 + private var accumulatedText: String = "" + + func writeHeaders(_ context: ChannelHandlerContext, extraHeaders: [(String, String)]? = nil) { + var head = HTTPResponseHead(version: .http1_1, status: .ok) + var headers = HTTPHeaders() + headers.add(name: "Content-Type", value: "text/event-stream") + headers.add(name: "Cache-Control", value: "no-cache, no-transform") + headers.add(name: "Connection", value: "keep-alive") + headers.add(name: "X-Accel-Buffering", value: "no") + headers.add(name: "Transfer-Encoding", value: "chunked") + if let extraHeaders { + for (n, v) in extraHeaders { headers.add(name: n, value: v) } + } + head.headers = headers + context.write(NIOAny(HTTPServerResponsePart.head(head)), promise: nil) + context.flush() + } + + /// Generate the next sequence number + private func nextSequenceNumber() -> Int { + sequenceNumber += 1 + return sequenceNumber + } + + /// Write response.created event to start the response + func writeResponseCreated( + responseId: String, + model: String, + inputTokens: Int, + context: ChannelHandlerContext + ) { + self.responseId = responseId + self.model = model + self.inputTokens = inputTokens + self.outputTokens = 0 + self.sequenceNumber = 0 + self.currentOutputIndex = 0 + self.accumulatedText = "" + + let response = OpenResponsesResponse( + id: responseId, + createdAt: Int(Date().timeIntervalSince1970), + status: .inProgress, + model: model, + output: [], + usage: nil + ) + let event = ResponseCreatedEvent(sequenceNumber: nextSequenceNumber(), response: response) + writeSSEEvent("response.created", payload: event, context: context) + } + + /// Write response.in_progress event + func writeResponseInProgress(context: ChannelHandlerContext) { + let response = OpenResponsesResponse( + id: responseId, + createdAt: Int(Date().timeIntervalSince1970), + status: .inProgress, + model: model, + output: [], + usage: nil + ) + let event = ResponseInProgressEvent(sequenceNumber: nextSequenceNumber(), response: response) + writeSSEEvent("response.in_progress", payload: event, context: context) + } + + /// Write response.output_item.added event for a new message item + func writeMessageItemAdded(itemId: String, context: ChannelHandlerContext) { + self.currentItemId = itemId + + let messageItem = OpenResponsesOutputMessage( + id: itemId, + status: .inProgress, + content: [] + ) + let event = OutputItemAddedEvent( + sequenceNumber: nextSequenceNumber(), + outputIndex: currentOutputIndex, + item: .message(messageItem) + ) + writeSSEEvent("response.output_item.added", payload: event, context: context) + } + + /// Write response.content_part.added event + func writeContentPartAdded(context: ChannelHandlerContext) { + let part = OpenResponsesOutputContent.outputText(OpenResponsesOutputText(text: "")) + let event = ContentPartAddedEvent( + sequenceNumber: nextSequenceNumber(), + itemId: currentItemId, + outputIndex: currentOutputIndex, + contentIndex: 0, + part: part + ) + writeSSEEvent("response.content_part.added", payload: event, context: context) + } + + /// Write response.output_text.delta event + @inline(__always) + func writeTextDelta(_ text: String, context: ChannelHandlerContext) { + guard !text.isEmpty else { return } + + accumulatedText += text + outputTokens += max(1, text.count / 4) + + let event = OutputTextDeltaEvent( + sequenceNumber: nextSequenceNumber(), + itemId: currentItemId, + outputIndex: currentOutputIndex, + contentIndex: 0, + delta: text + ) + writeSSEEvent("response.output_text.delta", payload: event, context: context) + } + + /// Write response.output_text.done event + func writeTextDone(context: ChannelHandlerContext) { + let event = OutputTextDoneEvent( + sequenceNumber: nextSequenceNumber(), + itemId: currentItemId, + outputIndex: currentOutputIndex, + contentIndex: 0, + text: accumulatedText + ) + writeSSEEvent("response.output_text.done", payload: event, context: context) + } + + /// Write response.output_item.done event for a completed message + func writeMessageItemDone(context: ChannelHandlerContext) { + let messageItem = OpenResponsesOutputMessage( + id: currentItemId, + status: .completed, + content: [.outputText(OpenResponsesOutputText(text: accumulatedText))] + ) + let event = OutputItemDoneEvent( + sequenceNumber: nextSequenceNumber(), + outputIndex: currentOutputIndex, + item: .message(messageItem) + ) + writeSSEEvent("response.output_item.done", payload: event, context: context) + currentOutputIndex += 1 + } + + /// Write response.output_item.added event for a function call + func writeFunctionCallItemAdded( + itemId: String, + callId: String, + name: String, + context: ChannelHandlerContext + ) { + self.currentItemId = itemId + + let functionCall = OpenResponsesFunctionCall( + id: itemId, + status: .inProgress, + callId: callId, + name: name, + arguments: "" + ) + let event = OutputItemAddedEvent( + sequenceNumber: nextSequenceNumber(), + outputIndex: currentOutputIndex, + item: .functionCall(functionCall) + ) + writeSSEEvent("response.output_item.added", payload: event, context: context) + } + + /// Write response.function_call_arguments.delta event + @inline(__always) + func writeFunctionCallArgumentsDelta( + callId: String, + delta: String, + context: ChannelHandlerContext + ) { + guard !delta.isEmpty else { return } + + accumulatedText += delta + + let event = FunctionCallArgumentsDeltaEvent( + sequenceNumber: nextSequenceNumber(), + itemId: currentItemId, + outputIndex: currentOutputIndex, + callId: callId, + delta: delta + ) + writeSSEEvent("response.function_call_arguments.delta", payload: event, context: context) + } + + /// Write response.function_call_arguments.done event + func writeFunctionCallArgumentsDone( + callId: String, + context: ChannelHandlerContext + ) { + let event = FunctionCallArgumentsDoneEvent( + sequenceNumber: nextSequenceNumber(), + itemId: currentItemId, + outputIndex: currentOutputIndex, + callId: callId, + arguments: accumulatedText + ) + writeSSEEvent("response.function_call_arguments.done", payload: event, context: context) + } + + /// Write response.output_item.done event for a function call + func writeFunctionCallItemDone( + callId: String, + name: String, + context: ChannelHandlerContext + ) { + let functionCall = OpenResponsesFunctionCall( + id: currentItemId, + status: .completed, + callId: callId, + name: name, + arguments: accumulatedText + ) + let event = OutputItemDoneEvent( + sequenceNumber: nextSequenceNumber(), + outputIndex: currentOutputIndex, + item: .functionCall(functionCall) + ) + writeSSEEvent("response.output_item.done", payload: event, context: context) + currentOutputIndex += 1 + } + + /// Write response.completed event + func writeResponseCompleted(context: ChannelHandlerContext) { + let response = OpenResponsesResponse( + id: responseId, + createdAt: Int(Date().timeIntervalSince1970), + status: .completed, + model: model, + output: [], + usage: OpenResponsesUsage(inputTokens: inputTokens, outputTokens: outputTokens) + ) + let event = ResponseCompletedEvent(sequenceNumber: nextSequenceNumber(), response: response) + writeSSEEvent("response.completed", payload: event, context: context) + } + + /// Write error event + func writeError(_ message: String, context: ChannelHandlerContext) { + let response = OpenResponsesResponse( + id: responseId, + createdAt: Int(Date().timeIntervalSince1970), + status: .failed, + model: model, + output: [], + usage: nil + ) + let error = OpenResponsesError(code: "internal_error", message: message) + let event = ResponseFailedEvent(sequenceNumber: nextSequenceNumber(), response: response, error: error) + writeSSEEvent("response.failed", payload: event, context: context) + } + + /// End the stream with [DONE] marker and close connection + func writeEnd(_ context: ChannelHandlerContext) { + var tail = context.channel.allocator.buffer(capacity: 16) + tail.writeString("data: [DONE]\n\n") + context.write(NIOAny(HTTPServerResponsePart.body(.byteBuffer(tail))), promise: nil) + let ctx = NIOLoopBound(context, eventLoop: context.eventLoop) + context.writeAndFlush(NIOAny(HTTPServerResponsePart.end(nil as HTTPHeaders?))).whenComplete { + _ in + ctx.value.close(promise: nil) + } + } + + // MARK: - Private Helpers + + @inline(__always) + private func writeSSEEvent(_ eventType: String, payload: T, context: ChannelHandlerContext) { + let encoder = IkigaJSONEncoder() + var buffer = context.channel.allocator.buffer(capacity: 512) + buffer.writeString("event: ") + buffer.writeString(eventType) + buffer.writeString("\ndata: ") + do { + try encoder.encodeAndWrite(payload, into: &buffer) + buffer.writeString("\n\n") + context.write(NIOAny(HTTPServerResponsePart.body(.byteBuffer(buffer))), promise: nil) + context.flush() + } catch { + print("Error encoding Open Responses SSE event: \(error)") + context.close(promise: nil) + } + } +} diff --git a/Packages/OsaurusCore/Networking/HTTPHandler.swift b/Packages/OsaurusCore/Networking/HTTPHandler.swift index 683446fa..dd86c767 100644 --- a/Packages/OsaurusCore/Networking/HTTPHandler.swift +++ b/Packages/OsaurusCore/Networking/HTTPHandler.swift @@ -200,6 +200,8 @@ final class HTTPHandler: ChannelInboundHandler, Sendable { handleAnthropicMessages(head: head, context: context, startTime: startTime, userAgent: userAgent) } else if head.method == .POST, path == "/audio/transcriptions" { handleAudioTranscriptions(head: head, context: context, startTime: startTime, userAgent: userAgent) + } else if head.method == .POST, path == "/responses" { + handleOpenResponses(head: head, context: context, startTime: startTime, userAgent: userAgent) } else { var headers = [("Content-Type", "text/plain; charset=utf-8")] headers.append(contentsOf: stateRef.value.corsHeaders) @@ -2048,6 +2050,421 @@ final class HTTPHandler: ChannelInboundHandler, Sendable { } } + // MARK: - Open Responses API + + private func handleOpenResponses( + head: HTTPRequestHead, + context: ChannelHandlerContext, + startTime: Date, + userAgent: String? + ) { + let data: Data + let requestBodyString: String? + if let body = stateRef.value.requestBodyBuffer { + var bodyCopy = body + let bytes = bodyCopy.readBytes(length: bodyCopy.readableBytes) ?? [] + data = Data(bytes) + requestBodyString = String(decoding: data, as: UTF8.self) + } else { + data = Data() + requestBodyString = nil + } + + // Parse Open Responses request + guard let openResponsesReq = try? JSONDecoder().decode(OpenResponsesRequest.self, from: data) else { + let error = OpenResponsesErrorResponse(code: "invalid_request_error", message: "Invalid request format") + let errorJson = + (try? JSONEncoder().encode(error)).map { String(decoding: $0, as: UTF8.self) } + ?? #"{"error":{"type":"error","code":"invalid_request_error","message":"Invalid request format"}}"# + var headers = [("Content-Type", "application/json; charset=utf-8")] + headers.append(contentsOf: stateRef.value.corsHeaders) + sendResponse( + context: context, + version: head.version, + status: .badRequest, + headers: headers, + body: errorJson + ) + logRequest( + method: "POST", + path: "/responses", + userAgent: userAgent, + requestBody: requestBodyString, + responseStatus: 400, + startTime: startTime, + errorMessage: "Invalid request format" + ) + return + } + + // Convert to internal format + let internalReq = openResponsesReq.toChatCompletionRequest() + + // Generate response ID + let responseId = "resp_\(UUID().uuidString.replacingOccurrences(of: "-", with: "").prefix(24))" + let model = openResponsesReq.model + + // Determine if streaming + let wantsStream = openResponsesReq.stream ?? false + + if wantsStream { + handleOpenResponsesStreaming( + request: openResponsesReq, + internalReq: internalReq, + responseId: responseId, + model: model, + context: context, + startTime: startTime, + userAgent: userAgent, + requestBodyString: requestBodyString + ) + } else { + handleOpenResponsesNonStreaming( + internalReq: internalReq, + responseId: responseId, + model: model, + head: head, + context: context, + startTime: startTime, + userAgent: userAgent, + requestBodyString: requestBodyString + ) + } + } + + private func handleOpenResponsesStreaming( + request: OpenResponsesRequest, + internalReq: ChatCompletionRequest, + responseId: String, + model: String, + context: ChannelHandlerContext, + startTime: Date, + userAgent: String?, + requestBodyString: String? + ) { + let writer = OpenResponsesSSEWriter() + let cors = stateRef.value.corsHeaders + let loop = context.eventLoop + let writerBound = NIOLoopBound(writer, eventLoop: loop) + let ctx = NIOLoopBound(context, eventLoop: loop) + let chatEngine = self.chatEngine + let hop: (@escaping @Sendable () -> Void) -> Void = { block in + if loop.inEventLoop { block() } else { loop.execute { block() } } + } + + // Estimate input tokens (rough: 1 token per 4 chars) + let inputTokens: Int = + { + switch request.input { + case .text(let text): + return max(1, text.count / 4) + case .items(let items): + return items.reduce(0) { acc, item in + switch item { + case .message(let msg): + return acc + max(1, msg.content.plainText.count / 4) + case .functionCallOutput(let output): + return acc + max(1, output.output.count / 4) + } + } + } + }() + (request.instructions?.count ?? 0) / 4 + + let itemId = "item_\(UUID().uuidString.replacingOccurrences(of: "-", with: "").prefix(24))" + + // Send headers and initial events + hop { + writerBound.value.writeHeaders(ctx.value, extraHeaders: cors) + writerBound.value.writeResponseCreated( + responseId: responseId, + model: model, + inputTokens: inputTokens, + context: ctx.value + ) + writerBound.value.writeResponseInProgress(context: ctx.value) + writerBound.value.writeMessageItemAdded(itemId: itemId, context: ctx.value) + writerBound.value.writeContentPartAdded(context: ctx.value) + } + + // Capture for logging + let logStartTime = startTime + let logUserAgent = userAgent + let logRequestBody = requestBodyString + let logModel = model + let logSelf = self + + Task(priority: .userInitiated) { + do { + let stream = try await chatEngine.streamChat(request: internalReq) + for try await delta in stream { + hop { + writerBound.value.writeTextDelta(delta, context: ctx.value) + } + } + hop { + writerBound.value.writeTextDone(context: ctx.value) + writerBound.value.writeMessageItemDone(context: ctx.value) + writerBound.value.writeResponseCompleted(context: ctx.value) + writerBound.value.writeEnd(ctx.value) + } + logSelf.logRequest( + method: "POST", + path: "/responses", + userAgent: logUserAgent, + requestBody: logRequestBody, + responseStatus: 200, + startTime: logStartTime, + model: logModel, + finishReason: .stop + ) + } catch let inv as ServiceToolInvocation { + // Handle tool invocation - emit function_call item + let callId = + inv.toolCallId ?? "call_\(UUID().uuidString.replacingOccurrences(of: "-", with: "").prefix(24))" + let funcItemId = "item_\(UUID().uuidString.replacingOccurrences(of: "-", with: "").prefix(24))" + let args = inv.jsonArguments + + hop { + // Close the text content if any was written + writerBound.value.writeTextDone(context: ctx.value) + writerBound.value.writeMessageItemDone(context: ctx.value) + + // Start function call item + writerBound.value.writeFunctionCallItemAdded( + itemId: funcItemId, + callId: callId, + name: inv.toolName, + context: ctx.value + ) + + // Stream the arguments in chunks + let chunkSize = 512 + var i = args.startIndex + while i < args.endIndex { + let next = args.index(i, offsetBy: chunkSize, limitedBy: args.endIndex) ?? args.endIndex + let chunk = String(args[i ..< next]) + writerBound.value.writeFunctionCallArgumentsDelta( + callId: callId, + delta: chunk, + context: ctx.value + ) + i = next + } + + // Complete the function call + writerBound.value.writeFunctionCallArgumentsDone(callId: callId, context: ctx.value) + writerBound.value.writeFunctionCallItemDone( + callId: callId, + name: inv.toolName, + context: ctx.value + ) + writerBound.value.writeResponseCompleted(context: ctx.value) + writerBound.value.writeEnd(ctx.value) + } + + let toolLog = ToolCallLog(name: inv.toolName, arguments: inv.jsonArguments) + logSelf.logRequest( + method: "POST", + path: "/responses", + userAgent: logUserAgent, + requestBody: logRequestBody, + responseStatus: 200, + startTime: logStartTime, + model: logModel, + toolCalls: [toolLog], + finishReason: .toolCalls + ) + } catch { + hop { + writerBound.value.writeError(error.localizedDescription, context: ctx.value) + writerBound.value.writeEnd(ctx.value) + } + logSelf.logRequest( + method: "POST", + path: "/responses", + userAgent: logUserAgent, + requestBody: logRequestBody, + responseStatus: 500, + startTime: logStartTime, + model: logModel, + finishReason: .error, + errorMessage: error.localizedDescription + ) + } + } + } + + private func handleOpenResponsesNonStreaming( + internalReq: ChatCompletionRequest, + responseId: String, + model: String, + head: HTTPRequestHead, + context: ChannelHandlerContext, + startTime: Date, + userAgent: String?, + requestBodyString: String? + ) { + let cors = stateRef.value.corsHeaders + let loop = context.eventLoop + let ctx = NIOLoopBound(context, eventLoop: loop) + let chatEngine = self.chatEngine + let hop: (@escaping @Sendable () -> Void) -> Void = { block in + if loop.inEventLoop { block() } else { loop.execute { block() } } + } + + // Capture for logging + let logStartTime = startTime + let logUserAgent = userAgent + let logRequestBody = requestBodyString + let logModel = model + let logSelf = self + + Task(priority: .userInitiated) { + do { + let resp = try await chatEngine.completeChat(request: internalReq) + + // Convert to Open Responses format + let openResponsesResp = resp.toOpenResponsesResponse(responseId: responseId) + + let json = try JSONEncoder().encode(openResponsesResp) + var headers: [(String, String)] = [("Content-Type", "application/json")] + headers.append(contentsOf: cors) + let headersCopy = headers + let body = String(decoding: json, as: UTF8.self) + + hop { + var responseHead = HTTPResponseHead(version: head.version, status: .ok) + var buffer = ctx.value.channel.allocator.buffer(capacity: body.utf8.count) + buffer.writeString(body) + var nioHeaders = HTTPHeaders() + for (name, value) in headersCopy { nioHeaders.add(name: name, value: value) } + nioHeaders.add(name: "Content-Length", value: String(buffer.readableBytes)) + nioHeaders.add(name: "Connection", value: "close") + responseHead.headers = nioHeaders + let c = ctx.value + c.write(NIOAny(HTTPServerResponsePart.head(responseHead)), promise: nil) + c.write(NIOAny(HTTPServerResponsePart.body(.byteBuffer(buffer))), promise: nil) + c.writeAndFlush(NIOAny(HTTPServerResponsePart.end(nil as HTTPHeaders?))).whenComplete { _ in + ctx.value.close(promise: nil) + } + } + + logSelf.logRequest( + method: "POST", + path: "/responses", + userAgent: logUserAgent, + requestBody: logRequestBody, + responseBody: body, + responseStatus: 200, + startTime: logStartTime, + model: logModel, + tokensInput: resp.usage.prompt_tokens, + tokensOutput: resp.usage.completion_tokens, + finishReason: .stop + ) + } catch let inv as ServiceToolInvocation { + // Handle tool invocation for non-streaming + let callId = + inv.toolCallId ?? "call_\(UUID().uuidString.replacingOccurrences(of: "-", with: "").prefix(24))" + let itemId = "item_\(UUID().uuidString.replacingOccurrences(of: "-", with: "").prefix(24))" + + let functionCall = OpenResponsesFunctionCall( + id: itemId, + status: .completed, + callId: callId, + name: inv.toolName, + arguments: inv.jsonArguments + ) + + let openResponsesResp = OpenResponsesResponse( + id: responseId, + createdAt: Int(Date().timeIntervalSince1970), + status: .completed, + model: model, + output: [.functionCall(functionCall)], + usage: OpenResponsesUsage(inputTokens: 0, outputTokens: 0) + ) + + let json = + (try? JSONEncoder().encode(openResponsesResp)) + .map { String(decoding: $0, as: UTF8.self) } ?? "{}" + var headers: [(String, String)] = [("Content-Type", "application/json")] + headers.append(contentsOf: cors) + let headersCopy = headers + let body = json + + hop { + var responseHead = HTTPResponseHead(version: head.version, status: .ok) + var buffer = ctx.value.channel.allocator.buffer(capacity: body.utf8.count) + buffer.writeString(body) + var nioHeaders = HTTPHeaders() + for (name, value) in headersCopy { nioHeaders.add(name: name, value: value) } + nioHeaders.add(name: "Content-Length", value: String(buffer.readableBytes)) + nioHeaders.add(name: "Connection", value: "close") + responseHead.headers = nioHeaders + let c = ctx.value + c.write(NIOAny(HTTPServerResponsePart.head(responseHead)), promise: nil) + c.write(NIOAny(HTTPServerResponsePart.body(.byteBuffer(buffer))), promise: nil) + c.writeAndFlush(NIOAny(HTTPServerResponsePart.end(nil as HTTPHeaders?))).whenComplete { _ in + ctx.value.close(promise: nil) + } + } + + let toolLog = ToolCallLog(name: inv.toolName, arguments: inv.jsonArguments) + logSelf.logRequest( + method: "POST", + path: "/responses", + userAgent: logUserAgent, + requestBody: logRequestBody, + responseBody: body, + responseStatus: 200, + startTime: logStartTime, + model: logModel, + toolCalls: [toolLog], + finishReason: .toolCalls + ) + } catch { + let errorResp = OpenResponsesErrorResponse(code: "api_error", message: error.localizedDescription) + let errorJson = + (try? JSONEncoder().encode(errorResp)) + .map { String(decoding: $0, as: UTF8.self) } + ?? #"{"error":{"type":"error","code":"api_error","message":"Internal error"}}"# + var headers: [(String, String)] = [("Content-Type", "application/json")] + headers.append(contentsOf: cors) + let headersCopy = headers + let body = errorJson + + hop { + var responseHead = HTTPResponseHead(version: head.version, status: .internalServerError) + var buffer = ctx.value.channel.allocator.buffer(capacity: body.utf8.count) + buffer.writeString(body) + var nioHeaders = HTTPHeaders() + for (name, value) in headersCopy { nioHeaders.add(name: name, value: value) } + nioHeaders.add(name: "Content-Length", value: String(buffer.readableBytes)) + nioHeaders.add(name: "Connection", value: "close") + responseHead.headers = nioHeaders + let c = ctx.value + c.write(NIOAny(HTTPServerResponsePart.head(responseHead)), promise: nil) + c.write(NIOAny(HTTPServerResponsePart.body(.byteBuffer(buffer))), promise: nil) + c.writeAndFlush(NIOAny(HTTPServerResponsePart.end(nil as HTTPHeaders?))).whenComplete { _ in + ctx.value.close(promise: nil) + } + } + + logSelf.logRequest( + method: "POST", + path: "/responses", + userAgent: logUserAgent, + requestBody: logRequestBody, + responseStatus: 500, + startTime: logStartTime, + model: logModel, + errorMessage: error.localizedDescription + ) + } + } + } + // MARK: - Multipart Form Data Parsing private func extractBoundary(from contentType: String) -> String? { diff --git a/Packages/OsaurusCore/Services/RemoteProviderManager.swift b/Packages/OsaurusCore/Services/RemoteProviderManager.swift index cdcbba80..2d4921be 100644 --- a/Packages/OsaurusCore/Services/RemoteProviderManager.swift +++ b/Packages/OsaurusCore/Services/RemoteProviderManager.swift @@ -367,7 +367,7 @@ public final class RemoteProviderManager: ObservableObject { if testHeaders["anthropic-version"] == nil { testHeaders["anthropic-version"] = "2023-06-01" } - case .openai: + case .openai, .openResponses: testHeaders["Authorization"] = "Bearer \(apiKey)" } } diff --git a/Packages/OsaurusCore/Services/RemoteProviderService.swift b/Packages/OsaurusCore/Services/RemoteProviderService.swift index 724506e5..f3921e38 100644 --- a/Packages/OsaurusCore/Services/RemoteProviderService.swift +++ b/Packages/OsaurusCore/Services/RemoteProviderService.swift @@ -336,6 +336,76 @@ public actor RemoteProviderService: ToolCapableService { break } } + } else if providerType == .openResponses { + // Parse Open Responses SSE event + if let eventType = try? JSONDecoder().decode( + OpenResponsesSSEEvent.self, + from: jsonData + ) { + switch eventType.type { + case "response.output_text.delta": + if let deltaEvent = try? JSONDecoder().decode( + OutputTextDeltaEvent.self, + from: jsonData + ) { + var output = deltaEvent.delta + for seq in stopSequences { + if let range = output.range(of: seq) { + output = String(output[.. OpenResponsesRequest { + var inputItems: [OpenResponsesInputItem] = [] + var instructions: String? = nil + + for msg in messages { + switch msg.role { + case "system": + // System messages become instructions + if let content = msg.content { + if let existing = instructions { + instructions = existing + "\n" + content + } else { + instructions = content + } + } + + case "user": + // User messages become message input items + if let content = msg.content { + let msgContent = OpenResponsesMessageContent.text(content) + inputItems.append(.message(OpenResponsesMessageItem(role: "user", content: msgContent))) + } + + case "assistant": + // Assistant messages with tool calls need special handling + if let toolCalls = msg.tool_calls, !toolCalls.isEmpty { + // First add any text content + if let content = msg.content, !content.isEmpty { + let msgContent = OpenResponsesMessageContent.text(content) + inputItems.append(.message(OpenResponsesMessageItem(role: "assistant", content: msgContent))) + } + // Note: function_call items from assistant are not input items in Open Responses + // They would be represented as prior output from the assistant + } else if let content = msg.content { + let msgContent = OpenResponsesMessageContent.text(content) + inputItems.append(.message(OpenResponsesMessageItem(role: "assistant", content: msgContent))) + } + + case "tool": + // Tool results become function_call_output items + if let toolCallId = msg.tool_call_id, let content = msg.content { + inputItems.append( + .functionCallOutput( + OpenResponsesFunctionCallOutputItem( + callId: toolCallId, + output: content + ) + ) + ) + } + + default: + // Unknown role - treat as user message + if let content = msg.content { + let msgContent = OpenResponsesMessageContent.text(content) + inputItems.append(.message(OpenResponsesMessageItem(role: "user", content: msgContent))) + } + } + } + + // Convert tools + var openResponsesTools: [OpenResponsesTool]? = nil + if let tools = tools { + openResponsesTools = tools.map { tool in + OpenResponsesTool( + name: tool.function.name, + description: tool.function.description, + parameters: tool.function.parameters + ) + } + } + + // Convert tool choice + var openResponsesToolChoice: OpenResponsesToolChoice? = nil + if let choice = tool_choice { + switch choice { + case .auto: + openResponsesToolChoice = .auto + case .none: + openResponsesToolChoice = OpenResponsesToolChoice.none + case .function(let fn): + openResponsesToolChoice = .function(name: fn.function.name) + } + } + + // Determine input format + let input: OpenResponsesInput + if inputItems.count == 1, case .message(let msg) = inputItems[0], msg.role == "user" { + // Single user message - use text shorthand + input = .text(msg.content.plainText) + } else { + input = .items(inputItems) + } + + return OpenResponsesRequest( + model: model, + input: input, + stream: stream, + tools: openResponsesTools, + tool_choice: openResponsesToolChoice, + temperature: temperature, + max_output_tokens: max_completion_tokens, + top_p: top_p, + instructions: instructions, + previous_response_id: nil, + metadata: nil + ) + } } // MARK: - Static Factory for Creating Services diff --git a/Packages/OsaurusCore/Views/ServerView.swift b/Packages/OsaurusCore/Views/ServerView.swift index 009ecaac..0122cd94 100644 --- a/Packages/OsaurusCore/Views/ServerView.swift +++ b/Packages/OsaurusCore/Views/ServerView.swift @@ -595,6 +595,20 @@ struct APIEndpoint { } """ ), + APIEndpoint( + method: "POST", + path: "/responses", + description: "Responses endpoint with streaming support", + compatibility: "Open Responses", + category: .chat, + examplePayload: """ + { + "model": "\(model)", + "input": "Hello!", + "stream": false + } + """ + ), // Audio endpoints APIEndpoint( method: "POST", diff --git a/README.md b/README.md index 07413b03..1511b87b 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,7 @@ Osaurus is the AI edge runtime for macOS. It brings together: | **Remote Providers** | Anthropic, OpenAI, OpenRouter, Ollama, LM Studio, or custom | | **OpenAI Compatible** | `/v1/chat/completions` with streaming and tool calling | | **Anthropic Compatible** | `/messages` endpoint for Claude Code and Anthropic SDK clients | +| **Open Responses** | `/responses` endpoint for multi-provider interoperability | | **MCP Server** | Connect to Cursor, Claude Desktop, and other MCP clients | | **Remote MCP Providers** | Aggregate tools from external MCP servers | | **Tools & Plugins** | Browser automation, file system, git, web search, and more | @@ -355,12 +356,13 @@ Base URL: `http://127.0.0.1:1337` (or your configured port) | Endpoint | Description | | --------------------------- | ----------------------------------- | -| `GET /health` | Server health | -| `GET /v1/models` | List models (OpenAI format) | -| `GET /v1/tags` | List models (Ollama format) | -| `POST /v1/chat/completions` | Chat completions (OpenAI format) | -| `POST /messages` | Chat completions (Anthropic format) | -| `POST /chat` | Chat (Ollama format, NDJSON) | +| `GET /health` | Server health | +| `GET /v1/models` | List models (OpenAI format) | +| `GET /v1/tags` | List models (Ollama format) | +| `POST /v1/chat/completions` | Chat completions (OpenAI format) | +| `POST /messages` | Chat completions (Anthropic format) | +| `POST /responses` | Responses (Open Responses format) | +| `POST /chat` | Chat (Ollama format, NDJSON) | All endpoints support `/v1`, `/api`, and `/v1/api` prefixes. diff --git a/docs/DEVELOPER_TOOLS.md b/docs/DEVELOPER_TOOLS.md index cad16d57..2d731c3d 100644 --- a/docs/DEVELOPER_TOOLS.md +++ b/docs/DEVELOPER_TOOLS.md @@ -120,17 +120,18 @@ Copy the server URL with one click for use in your applications. Browse all available endpoints, organized by category: -| Category | Endpoints | -| -------- | ---------------------------------------- | -| **Core** | `/`, `/health`, `/models`, `/tags` | -| **Chat** | `/chat/completions`, `/chat` | -| **MCP** | `/mcp/health`, `/mcp/tools`, `/mcp/call` | +| Category | Endpoints | +| --------- | ------------------------------------------------------ | +| **Core** | `/`, `/health`, `/models`, `/tags` | +| **Chat** | `/chat/completions`, `/chat`, `/messages`, `/responses` | +| **Audio** | `/audio/transcriptions` | +| **MCP** | `/mcp/health`, `/mcp/tools`, `/mcp/call` | Each endpoint shows: - HTTP method (GET/POST) - Path -- Compatibility badge (OpenAI, Ollama, MCP) +- Compatibility badge (OpenAI, Ollama, Anthropic, Open Responses, MCP) - Description #### Interactive Testing diff --git a/docs/FEATURES.md b/docs/FEATURES.md index 012184ab..f658b1c8 100644 --- a/docs/FEATURES.md +++ b/docs/FEATURES.md @@ -27,6 +27,7 @@ Canonical reference for all Osaurus features, their status, and documentation. | Shared Configuration | Stable | - | SHARED_CONFIGURATION_GUIDE.md | Services/SharedConfigurationService.swift | | OpenAI API Compatibility | Stable | "API Endpoints" | OpenAI_API_GUIDE.md | Networking/HTTPHandler.swift, Models/OpenAIAPI.swift | | Anthropic API Compatibility | Stable | "API Endpoints" | (in README) | Networking/HTTPHandler.swift, Models/AnthropicAPI.swift | +| Open Responses API | Stable | "API Endpoints" | OpenAI_API_GUIDE.md | Networking/HTTPHandler.swift, Models/OpenResponsesAPI.swift | | Ollama API Compatibility | Stable | "API Endpoints" | (in README) | Networking/HTTPHandler.swift | | Voice Input (WhisperKit) | Stable | "Voice Input" | VOICE_INPUT.md | Services/WhisperKitService.swift, Managers/WhisperModelManager.swift | | VAD Mode | Stable | "Voice Input" | VOICE_INPUT.md | Services/VADService.swift, Views/ContentView.swift (VAD controls) | @@ -230,6 +231,42 @@ Canonical reference for all Osaurus features, their status, and documentation. --- +### Open Responses API + +**Purpose:** Provide [Open Responses](https://www.openresponses.org) API compatibility for multi-provider interoperability. + +**Components:** + +- `Models/OpenResponsesAPI.swift` — Open Responses request/response models and streaming events +- `Models/ResponseWriters.swift` — SSE streaming for Open Responses format +- `Networking/HTTPHandler.swift` — `/responses` endpoint handler +- `Services/RemoteProviderService.swift` — Remote Open Responses provider support + +**Features:** + +- Full Responses API support (`/responses` endpoint) +- Streaming with semantic events (`response.output_text.delta`, `response.completed`, etc.) +- Non-streaming responses +- Tool/function calling support +- Input as simple string or structured items +- Instructions (system prompt) support +- Connect to remote Open Responses-compatible providers + +**Streaming Events:** + +| Event | Description | +|-------|-------------| +| `response.created` | Response object created | +| `response.in_progress` | Generation started | +| `response.output_item.added` | New output item (message or function call) | +| `response.output_text.delta` | Text content delta | +| `response.output_text.done` | Text content completed | +| `response.function_call_arguments.delta` | Function arguments delta | +| `response.output_item.done` | Output item completed | +| `response.completed` | Response finished | + +--- + ### Custom Themes **Purpose:** Customize the chat interface appearance with custom color schemes and styling. diff --git a/docs/OpenAI_API_GUIDE.md b/docs/OpenAI_API_GUIDE.md index 7e695842..e47bf46e 100644 --- a/docs/OpenAI_API_GUIDE.md +++ b/docs/OpenAI_API_GUIDE.md @@ -1,6 +1,6 @@ -# OpenAI API Compatible Endpoints +# API Endpoints Guide -This guide explains how to use the OpenAI-compatible API endpoints in Osaurus. +This guide explains how to use the API endpoints in Osaurus, including OpenAI-compatible, Anthropic-compatible, and Open Responses formats. ## Available Endpoints @@ -329,6 +329,189 @@ for chunk in stream: print(chunk.choices[0].delta.content, end="") ``` +## Open Responses API + +Osaurus supports the [Open Responses](https://www.openresponses.org) specification, providing a semantic, item-based API format for multi-provider interoperability. + +### 3. Responses - `POST /responses` + +Generate responses using the Open Responses format. + +#### Non-streaming Request + +```bash +curl http://127.0.0.1:1337/responses \ + -H "Content-Type: application/json" \ + -d '{ + "model": "llama-3.2-3b-instruct", + "input": "Hello, how are you?", + "instructions": "You are a helpful assistant." + }' +``` + +Example response: + +```json +{ + "id": "resp_abc123", + "object": "response", + "created_at": 1738193123, + "status": "completed", + "model": "llama-3.2-3b-instruct", + "output": [ + { + "type": "message", + "id": "item_xyz789", + "status": "completed", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": "I'm doing well, thank you for asking! How can I help you today?" + } + ] + } + ], + "usage": { + "input_tokens": 20, + "output_tokens": 15, + "total_tokens": 35 + } +} +``` + +#### Streaming Request + +```bash +curl http://127.0.0.1:1337/responses \ + -H "Content-Type: application/json" \ + -d '{ + "model": "llama-3.2-3b-instruct", + "input": "Tell me a short story", + "stream": true + }' +``` + +Streaming responses use Server-Sent Events with semantic event types: + +``` +event: response.created +data: {"type":"response.created","sequence_number":1,"response":{...}} + +event: response.in_progress +data: {"type":"response.in_progress","sequence_number":2,"response":{...}} + +event: response.output_item.added +data: {"type":"response.output_item.added","sequence_number":3,"output_index":0,"item":{...}} + +event: response.output_text.delta +data: {"type":"response.output_text.delta","sequence_number":4,"item_id":"item_xyz","delta":"Once"} + +event: response.output_text.delta +data: {"type":"response.output_text.delta","sequence_number":5,"item_id":"item_xyz","delta":" upon"} + +event: response.output_text.done +data: {"type":"response.output_text.done","sequence_number":10,"item_id":"item_xyz","text":"Once upon a time..."} + +event: response.output_item.done +data: {"type":"response.output_item.done","sequence_number":11,"output_index":0,"item":{...}} + +event: response.completed +data: {"type":"response.completed","sequence_number":12,"response":{...}} + +data: [DONE] +``` + +#### Structured Input + +For multi-turn conversations, use structured input items: + +```bash +curl http://127.0.0.1:1337/responses \ + -H "Content-Type: application/json" \ + -d '{ + "model": "llama-3.2-3b-instruct", + "input": [ + {"type": "message", "role": "user", "content": "What is 2+2?"}, + {"type": "message", "role": "assistant", "content": "2+2 equals 4."}, + {"type": "message", "role": "user", "content": "And 3+3?"} + ] + }' +``` + +#### Tool Calling with Open Responses + +```bash +curl http://127.0.0.1:1337/responses \ + -H "Content-Type: application/json" \ + -d '{ + "model": "llama-3.2-3b-instruct", + "input": "What is the weather in San Francisco?", + "tools": [ + { + "type": "function", + "name": "get_weather", + "description": "Get weather by city name", + "parameters": { + "type": "object", + "properties": {"city": {"type": "string"}}, + "required": ["city"] + } + } + ] + }' +``` + +Tool call response: + +```json +{ + "id": "resp_abc123", + "object": "response", + "status": "completed", + "output": [ + { + "type": "function_call", + "id": "item_xyz", + "status": "completed", + "call_id": "call_123", + "name": "get_weather", + "arguments": "{\"city\":\"San Francisco\"}" + } + ] +} +``` + +To continue after a tool call, include the function output: + +```bash +curl http://127.0.0.1:1337/responses \ + -H "Content-Type: application/json" \ + -d '{ + "model": "llama-3.2-3b-instruct", + "input": [ + {"type": "message", "role": "user", "content": "What is the weather in SF?"}, + {"type": "function_call_output", "call_id": "call_123", "output": "{\"temp\": 65, \"conditions\": \"Foggy\"}"} + ] + }' +``` + +### Open Responses Request Parameters + +| Parameter | Type | Description | +|-----------|------|-------------| +| `model` | string | Model identifier (required) | +| `input` | string or array | Input text or array of input items (required) | +| `stream` | boolean | Enable streaming (default: false) | +| `instructions` | string | System prompt | +| `tools` | array | Available tools/functions | +| `tool_choice` | string/object | Tool selection mode ("auto", "none", "required") | +| `temperature` | float | Sampling temperature | +| `max_output_tokens` | integer | Maximum tokens to generate | +| `top_p` | float | Top-p sampling parameter | + +--- + ## Notes 1. **Model Availability**: Only models that have been downloaded through the Osaurus UI will be available via the API. diff --git a/docs/REMOTE_PROVIDERS.md b/docs/REMOTE_PROVIDERS.md index d605f34c..f8d19ad4 100644 --- a/docs/REMOTE_PROVIDERS.md +++ b/docs/REMOTE_PROVIDERS.md @@ -1,6 +1,6 @@ # Remote Providers -Remote Providers allow you to connect Osaurus to external APIs (Anthropic, OpenAI, and compatible endpoints), giving you access to cloud models alongside your local MLX models. +Remote Providers allow you to connect Osaurus to external APIs (OpenAI, Anthropic, Open Responses, and compatible endpoints), giving you access to cloud models alongside your local MLX models. --- @@ -30,14 +30,23 @@ With Remote Providers, you can: Osaurus includes presets for common providers: -| Preset | Host | Port | Base Path | Auth | -| -------------- | ----------------- | ------------- | --------- | ---------------- | -| **Anthropic** | api.anthropic.com | 443 | /v1 | API Key required | -| **OpenAI** | api.openai.com | 443 | /v1 | API Key required | -| **OpenRouter** | openrouter.ai | 443 | /api/v1 | API Key required | -| **Ollama** | localhost | 11434 | /v1 | None | -| **LM Studio** | localhost | 1234 | /v1 | None | -| **Custom** | (you specify) | (you specify) | /v1 | Optional | +| Preset | Host | Port | Base Path | API Format | Auth | +| ------------------ | ----------------- | ------------- | --------- | --------------- | ---------------- | +| **OpenAI** | api.openai.com | 443 | /v1 | OpenAI | API Key required | +| **Anthropic** | api.anthropic.com | 443 | /v1 | Anthropic | API Key required | +| **OpenRouter** | openrouter.ai | 443 | /api/v1 | OpenAI | API Key required | +| **Ollama** | localhost | 11434 | /v1 | OpenAI | None | +| **LM Studio** | localhost | 1234 | /v1 | OpenAI | None | +| **Open Responses** | (you specify) | (you specify) | /v1 | Open Responses | Optional | +| **Custom** | (you specify) | (you specify) | /v1 | OpenAI | Optional | + +### API Format Types + +| Format | Endpoint | Description | +|--------|----------|-------------| +| **OpenAI** | `/chat/completions` | OpenAI Chat Completions API format | +| **Anthropic** | `/messages` | Anthropic Messages API format | +| **Open Responses** | `/responses` | [Open Responses](https://www.openresponses.org) API format | --- From da015d0f31a93706ac52bb85c31d64b195fdc2cc Mon Sep 17 00:00:00 2001 From: Terence Pae Date: Mon, 19 Jan 2026 11:07:22 -0800 Subject: [PATCH 2/2] updated docs --- README.md | 2 +- docs/OpenAI_API_GUIDE.md | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 1511b87b..1bab29a6 100644 --- a/README.md +++ b/README.md @@ -361,7 +361,7 @@ Base URL: `http://127.0.0.1:1337` (or your configured port) | `GET /v1/tags` | List models (Ollama format) | | `POST /v1/chat/completions` | Chat completions (OpenAI format) | | `POST /messages` | Chat completions (Anthropic format) | -| `POST /responses` | Responses (Open Responses format) | +| `POST /v1/responses` | Responses (Open Responses format) | | `POST /chat` | Chat (Ollama format, NDJSON) | All endpoints support `/v1`, `/api`, and `/v1/api` prefixes. diff --git a/docs/OpenAI_API_GUIDE.md b/docs/OpenAI_API_GUIDE.md index e47bf46e..69260d4f 100644 --- a/docs/OpenAI_API_GUIDE.md +++ b/docs/OpenAI_API_GUIDE.md @@ -333,14 +333,14 @@ for chunk in stream: Osaurus supports the [Open Responses](https://www.openresponses.org) specification, providing a semantic, item-based API format for multi-provider interoperability. -### 3. Responses - `POST /responses` +### 3. Responses - `POST /responses` (also available at `POST /v1/responses`) Generate responses using the Open Responses format. #### Non-streaming Request ```bash -curl http://127.0.0.1:1337/responses \ +curl http://127.0.0.1:1337/v1/responses \ -H "Content-Type: application/json" \ -d '{ "model": "llama-3.2-3b-instruct", @@ -383,7 +383,7 @@ Example response: #### Streaming Request ```bash -curl http://127.0.0.1:1337/responses \ +curl http://127.0.0.1:1337/v1/responses \ -H "Content-Type: application/json" \ -d '{ "model": "llama-3.2-3b-instruct", @@ -427,7 +427,7 @@ data: [DONE] For multi-turn conversations, use structured input items: ```bash -curl http://127.0.0.1:1337/responses \ +curl http://127.0.0.1:1337/v1/responses \ -H "Content-Type: application/json" \ -d '{ "model": "llama-3.2-3b-instruct", @@ -442,7 +442,7 @@ curl http://127.0.0.1:1337/responses \ #### Tool Calling with Open Responses ```bash -curl http://127.0.0.1:1337/responses \ +curl http://127.0.0.1:1337/v1/responses \ -H "Content-Type: application/json" \ -d '{ "model": "llama-3.2-3b-instruct", @@ -485,7 +485,7 @@ Tool call response: To continue after a tool call, include the function output: ```bash -curl http://127.0.0.1:1337/responses \ +curl http://127.0.0.1:1337/v1/responses \ -H "Content-Type: application/json" \ -d '{ "model": "llama-3.2-3b-instruct",