Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
All notable changes to `@stackbilt/llm-providers` are documented here.
Format follows [Keep a Changelog](https://keepachangelog.com/). Versions use [Semantic Versioning](https://semver.org/).

## [1.5.1] — 2026-04-27

### Fixed
- **`analyzeImage()` silent empty response on Cloudflare** — `@cf/meta/llama-3.2-11b-vision-instruct` via the Workers AI binding requires a raw `{ image: number[], prompt, max_tokens }` input shape, not the OpenAI-compatible `messages/image_url` format. The chat path returns `choices[0].message.content === null` via the binding, causing `extractText()` to silently return `""`. The provider now detects this model and dispatches to the raw binding format, mapping the result's `{ response: string }` back through the existing normalisation path. Other vision models (`@cf/google/gemma-4-26b-a4b-it`, `@cf/meta/llama-4-scout-17b-16e-instruct`) continue using the chat format unchanged. Fixes #53.

## [1.5.0] — 2026-04-23

Bundles the unreleased 1.4.0 scope (model retirements, drift test) with envelope validation, env auto-discovery, and the declarative catalog into a single minor release. 1.4.0 was tagged in `package.json` but never published to npm; consumers upgrading from 1.3.0 receive all of the following.
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@stackbilt/llm-providers",
"version": "1.5.0",
"version": "1.5.1",
"description": "Multi-LLM failover with circuit breakers, cost tracking, and intelligent retry. Cloudflare Workers native.",
"author": "Stackbilt <admin@stackbilt.dev>",
"license": "Apache-2.0",
Expand Down
107 changes: 102 additions & 5 deletions src/__tests__/cloudflare.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -348,20 +348,117 @@ describe('CloudflareProvider', () => {
expect(content.filter((p: { type: string }) => p.type === 'image_url')).toHaveLength(2);
});

it('accepts pre-formed data: URLs via image.url', async () => {
mockAiRun.mockResolvedValueOnce({
choices: [{ message: { content: 'ok' }, finish_reason: 'stop' }]
it('uses raw binding format for llama-3.2-11b-vision-instruct (fixes silent empty response)', async () => {
mockAiRun.mockResolvedValueOnce({ response: 'A delicious pasta dish.' });

const result = await provider.generateResponse({
model: '@cf/meta/llama-3.2-11b-vision-instruct',
messages: [{ role: 'user', content: 'Describe this food image.' }],
images: [{ data: 'QUJD', mimeType: 'image/jpeg' }],
maxTokens: 512
});

const [modelArg, body] = mockAiRun.mock.calls[0];
expect(modelArg).toBe('@cf/meta/llama-3.2-11b-vision-instruct');
expect(Array.isArray(body.image)).toBe(true);
expect(body.image).toHaveLength(3); // QUJD = 3 bytes: [65, 66, 67]
expect(body.prompt).toBe('Describe this food image.');
expect(body.max_tokens).toBe(512);
expect(body.messages).toBeUndefined();
expect(result.content).toBe('A delicious pasta dish.');
expect(result.message).toBe('A delicious pasta dish.');
});

it('prepends system prompt to raw binding prompt for llama-3.2', async () => {
mockAiRun.mockResolvedValueOnce({ response: 'Pasta.' });

await provider.generateResponse({
model: '@cf/meta/llama-3.2-11b-vision-instruct',
messages: [{ role: 'user', content: 'What is this?' }],
images: [{ data: 'QUJD', mimeType: 'image/jpeg' }],
systemPrompt: 'You are a food critic.',
});

const [, body] = mockAiRun.mock.calls[0];
expect(body.prompt).toBe('You are a food critic.\n\nWhat is this?');
});

it('rejects multiple images on llama-3.2 raw binding with a clear error', async () => {
await expect(
provider.generateResponse({
model: '@cf/meta/llama-3.2-11b-vision-instruct',
messages: [{ role: 'user', content: 'compare' }],
images: [
{ data: 'QUJD', mimeType: 'image/jpeg' },
{ data: 'REVG', mimeType: 'image/jpeg' }
]
})
).rejects.toThrow(/supports exactly one image/);
});

it('extracts text from array-content user message for llama-3.2 raw binding', async () => {
mockAiRun.mockResolvedValueOnce({ response: 'Spaghetti.' });

await provider.generateResponse({
model: '@cf/meta/llama-3.2-11b-vision-instruct',
messages: [{
role: 'user',
content: [
{ type: 'text', text: 'What food is this?' },
{ type: 'text', text: 'Be brief.' }
] as unknown as string
}],
images: [{ data: 'QUJD', mimeType: 'image/jpeg' }]
});

const [, body] = mockAiRun.mock.calls[0];
expect(body.prompt).toBe('What food is this? Be brief.');
});

it('defaults max_tokens to 512 when not specified for llama-3.2 raw binding', async () => {
mockAiRun.mockResolvedValueOnce({ response: 'ok' });

await provider.generateResponse({
model: '@cf/meta/llama-3.2-11b-vision-instruct',
messages: [{ role: 'user', content: 'x' }],
images: [{ data: 'QUJD', mimeType: 'image/jpeg' }]
});

const [, body] = mockAiRun.mock.calls[0];
expect(body.max_tokens).toBe(512);
});

it('accepts pre-formed data: URL for llama-3.2 raw binding', async () => {
mockAiRun.mockResolvedValueOnce({ response: 'ok' });

await provider.generateResponse({
model: '@cf/meta/llama-3.2-11b-vision-instruct',
messages: [{ role: 'user', content: 'x' }],
images: [{ url: 'data:image/webp;base64,ZEFUQQ==' }]
});

const [, body] = mockAiRun.mock.calls[0];
const imagePart = body.messages[0].content[1];
expect(imagePart.image_url.url).toBe('data:image/webp;base64,ZEFUQQ==');
expect(Array.isArray(body.image)).toBe(true);
expect(body.messages).toBeUndefined();
});

it('other vision models (gemma-4, llama-4-scout) still use chat/image_url format', async () => {
mockAiRun.mockResolvedValueOnce({
choices: [{ message: { content: 'A tomato.' }, finish_reason: 'stop' }]
});

await provider.generateResponse({
model: '@cf/google/gemma-4-26b-a4b-it',
messages: [{ role: 'user', content: 'What is in this image?' }],
images: [{ data: 'QUJD', mimeType: 'image/png' }],
maxTokens: 256
});

const [, body] = mockAiRun.mock.calls[0];
expect(body.messages).toBeDefined();
expect(body.image).toBeUndefined();
const imagePart = body.messages[body.messages.length - 1].content[1];
expect(imagePart.image_url.url).toBe('data:image/png;base64,QUJD');
});

it('rejects HTTP image URLs (requires base64 bytes)', async () => {
Expand Down
66 changes: 64 additions & 2 deletions src/providers/cloudflare.ts
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,14 @@ interface WorkersAIResult {
result?: WorkersAIResult; // wrapped responses
}

// Models that require the raw { image, prompt } binding format rather than chat/image_url.
// Add any new CF vision models here if they exhibit the same null-content symptom via the binding.
// (The chat path returns choices[0].message.content === null through the Workers AI binding,
// silently producing "".)
const LLAMA_VISION_RAW_MODELS = new Set([
'@cf/meta/llama-3.2-11b-vision-instruct'
]);

export class CloudflareProvider extends BaseProvider {
name = 'cloudflare';
models = [
Expand Down Expand Up @@ -134,13 +142,19 @@ export class CloudflareProvider extends BaseProvider {
try {
const response = await this.executeWithResiliency(async () => {
const model = request.model || this.getRecommendedModel(request);
const cloudflareRequest = this.formatRequest(request, model);

// Validate model is supported
if (!this.models.includes(model)) {
throw new ModelNotFoundError('cloudflare', model);
}

// llama-3.2-11b vision requires the raw Workers AI binding format.
// The chat/image_url path returns null content via the binding.
if (LLAMA_VISION_RAW_MODELS.has(model) && (request.images?.length ?? 0) > 0) {
const result = await this.runLlamaVisionRaw(request, model);
return this.formatResponse(result as WorkersAIResult, model, request, Date.now() - startTime);
}

const cloudflareRequest = this.formatRequest(request, model);
// Workers AI binding uses branded model names; cast at API boundary
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Ai.run() requires branded model types
const result = await (this.ai as { run(model: string, input: unknown): Promise<unknown> }).run(model, cloudflareRequest);
Expand Down Expand Up @@ -358,6 +372,54 @@ export class CloudflareProvider extends BaseProvider {
};
}

private async runLlamaVisionRaw(request: LLMRequest, model: string): Promise<WorkersAIResult> {
if (request.images!.length > 1) {
throw new ConfigurationError(
this.name,
`${model} supports exactly one image via the raw binding format — ${request.images!.length} were provided.`
);
}

const image = request.images![0];

let imageBytes: number[];
if (image.data) {
imageBytes = Array.from(Uint8Array.from(atob(image.data), c => c.charCodeAt(0)));
} else if (image.url?.startsWith('data:')) {
const b64 = image.url.split(',')[1] ?? '';
imageBytes = Array.from(Uint8Array.from(atob(b64), c => c.charCodeAt(0)));
} else {
throw new ConfigurationError(
this.name,
`${model} requires base64 image data or a data: URL — HTTP URLs are not supported.`
);
}

const systemPrefix = request.systemPrompt ? `${request.systemPrompt}\n\n` : '';
let lastUserText = '';
for (let i = request.messages.length - 1; i >= 0; i--) {
if (request.messages[i].role === 'user') {
const raw = request.messages[i].content;
lastUserText = typeof raw === 'string'
? raw
: Array.isArray(raw)
? (raw as Array<{ type?: string; text?: string }>)
.filter(p => p.type === 'text')
.map(p => p.text ?? '')
.join(' ')
: '';
break;
}
}

// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Ai.run() requires branded model types
return (this.ai as { run(model: string, input: unknown): Promise<unknown> }).run(model, {
image: imageBytes,
prompt: `${systemPrefix}${lastUserText}`,
max_tokens: request.maxTokens ?? 512
}) as Promise<WorkersAIResult>;
}

private formatRequest(request: LLMRequest, model: string): CloudflareRequest {
const capabilities = this.getModelCapabilities()[model];
const usesTools =
Expand Down