From 8e6b7c93c0db8e596f3a8e49562203ca58961b8d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Feb 2026 09:55:39 +0000 Subject: [PATCH 1/5] Initial plan From 09429d84195c5577cadbe0414c9fd7b80818e3ec Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Feb 2026 09:59:26 +0000 Subject: [PATCH 2/5] Fix cursor disappearing and add Ollama streaming support - Remove process.stdin.setRawMode(true) to fix cursor disappearing - Remove manual cursor manipulation with ANSI codes - Implement Ollama completionStream using streamingGenerate API - Add Ollama streaming handling in runCompletion method Co-authored-by: joone <1979160+joone@users.noreply.github.com> --- src/llm/index.ts | 37 +++++++++++++++++++++++++++++++++++-- src/loz.ts | 13 +++++++++++-- src/prompt/index.ts | 24 +----------------------- 3 files changed, 47 insertions(+), 27 deletions(-) diff --git a/src/llm/index.ts b/src/llm/index.ts index 5fe0307..40f3919 100644 --- a/src/llm/index.ts +++ b/src/llm/index.ts @@ -111,8 +111,41 @@ export class OllamaAPI extends LLMService { } public async completionStream(params: LLMSettings): Promise { - // eslint-disable-next-line @typescript-eslint/no-unused-vars - return {}; + if (DEBUG) { + console.log("Ollama stream completion"); + console.log("Model: " + params.model); + } + await this.api.setModel(params.model); + + // Return a promise that resolves with a stream-like object + return new Promise((resolve, reject) => { + const chunks: string[] = []; + + this.api.streamingGenerate( + params.prompt, + // responseOutput callback - called for each chunk + (chunk: string) => { + chunks.push(chunk); + }, + // contextOutput callback + null, + // fullResponseOutput callback + null, + // statsOutput callback + null + ).then(() => { + // When streaming is complete, resolve with an async iterator + resolve({ + [Symbol.asyncIterator]: async function* () { + for (const chunk of chunks) { + yield { response: chunk }; + } + } + }); + }).catch((error: any) => { + reject(error); + }); + }); } } diff --git a/src/loz.ts b/src/loz.ts index f20bfe5..2c3b291 100644 --- a/src/loz.ts +++ b/src/loz.ts @@ -272,7 +272,7 @@ export class Loz { let curCompleteText = ""; const api = this.checkAPI(); - if (api === "openai" || api === "github-copilot") { + if (api === "openai" || api === "github-copilot" || api === "ollama") { let stream: any; try { stream = await this.llmAPI.completionStream(params); @@ -347,6 +347,15 @@ export class Loz { } } + process.stdout.write("\n"); + } else if (api === "ollama") { + // Handle Ollama streaming + for await (const data of stream) { + if (data === null) return; + const streamData = data.response || ""; + curCompleteText += streamData; + process.stdout.write(streamData); + } process.stdout.write("\n"); } else { // OpenAI streaming @@ -395,7 +404,7 @@ export class Loz { } cli.prompt(); }); - cli.start(true); + cli.start(); }); } diff --git a/src/prompt/index.ts b/src/prompt/index.ts index edb9dd3..67d81e6 100644 --- a/src/prompt/index.ts +++ b/src/prompt/index.ts @@ -7,7 +7,6 @@ interface PromptInterface { export class CommandLinePrompt implements PromptInterface { private rl: readline.Interface; private callback: (input: string) => Promise; - private timer: any; constructor(callback: (input: string) => Promise) { // ... @@ -22,26 +21,13 @@ export class CommandLinePrompt implements PromptInterface { this.rl.prompt(); } - public async start(blinking?: boolean): Promise { + public async start(): Promise { // Set the prompt to display before each input this.rl.setPrompt("> "); // Show the cursor and prompt the user for input this.rl.prompt(); - // Set the terminal to raw mode to allow for cursor manipulation - process.stdin.setRawMode(true); - - // Display a blinking cursor - if (blinking) { - this.timer = setInterval(() => { - process.stdout.write("\x1B[?25h"); - setTimeout(() => { - process.stdout.write("\x1B[?25l"); - }, 500); - }, 1000); - } - // Listen for user input this.rl.on("line", async (input) => { this.rl.prompt(); @@ -50,19 +36,11 @@ export class CommandLinePrompt implements PromptInterface { // Handle CTRL+C to exit the program this.rl.on("SIGINT", () => { - clearInterval(this.timer); this.rl.close(); }); } public exit(): void { - clearInterval(this.timer); - // Show the cursor - process.stdout.write("\x1B[?25h"); - // Try to enable cursor blinking - process.stdout.write("\x1B[?12h"); - // Reset the terminal to the normal mode - process.stdin.setRawMode(false); this.rl.close(); } } From aeadc7eb766ffe2b428e0a610b5358cc4ff3a611 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Feb 2026 10:00:33 +0000 Subject: [PATCH 3/5] Improve Ollama streaming to yield chunks as they arrive - Refactor completionStream to use async iterator pattern - Yield chunks immediately as they arrive from callback - Use queue and promise-based approach for true streaming - Addresses code review feedback Co-authored-by: joone <1979160+joone@users.noreply.github.com> --- src/llm/index.ts | 94 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 67 insertions(+), 27 deletions(-) diff --git a/src/llm/index.ts b/src/llm/index.ts index 40f3919..1168476 100644 --- a/src/llm/index.ts +++ b/src/llm/index.ts @@ -117,35 +117,75 @@ export class OllamaAPI extends LLMService { } await this.api.setModel(params.model); - // Return a promise that resolves with a stream-like object - return new Promise((resolve, reject) => { - const chunks: string[] = []; - - this.api.streamingGenerate( - params.prompt, - // responseOutput callback - called for each chunk - (chunk: string) => { + // Create a queue to handle chunks as they arrive + const chunks: string[] = []; + let resolveNext: ((value: IteratorResult<{ response: string }>) => void) | null = null; + let isComplete = false; + let error: any = null; + + // Start streaming immediately + this.api.streamingGenerate( + params.prompt, + // responseOutput callback - called for each chunk as it arrives + (chunk: string) => { + if (resolveNext) { + // If someone is waiting for a chunk, give it to them immediately + resolveNext({ value: { response: chunk }, done: false }); + resolveNext = null; + } else { + // Otherwise, queue it for later chunks.push(chunk); - }, - // contextOutput callback - null, - // fullResponseOutput callback - null, - // statsOutput callback - null - ).then(() => { - // When streaming is complete, resolve with an async iterator - resolve({ - [Symbol.asyncIterator]: async function* () { - for (const chunk of chunks) { - yield { response: chunk }; - } - } - }); - }).catch((error: any) => { - reject(error); - }); + } + }, + // contextOutput callback + null, + // fullResponseOutput callback + null, + // statsOutput callback + null + ).then(() => { + isComplete = true; + // If someone is waiting, notify them we're done + if (resolveNext) { + resolveNext({ value: undefined, done: true }); + resolveNext = null; + } + }).catch((err: any) => { + error = err; + isComplete = true; + if (resolveNext) { + resolveNext({ value: undefined, done: true }); + resolveNext = null; + } }); + + // Return an async iterator that yields chunks as they arrive + return { + [Symbol.asyncIterator]: () => ({ + next: (): Promise> => { + // If there's an error, throw it + if (error) { + throw error; + } + + // If we have queued chunks, return one immediately + if (chunks.length > 0) { + const chunk = chunks.shift()!; + return Promise.resolve({ value: { response: chunk }, done: false }); + } + + // If streaming is complete, we're done + if (isComplete) { + return Promise.resolve({ value: undefined, done: true }); + } + + // Otherwise, wait for the next chunk + return new Promise((resolve) => { + resolveNext = resolve; + }); + } + }) + }; } } From 9aff707e89ce90482dd506c8aebb499491601d2f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Feb 2026 10:01:49 +0000 Subject: [PATCH 4/5] Fix race conditions and improve error handling in streaming - Queue chunks before resolving to prevent race conditions - Clear chunk queue on error to avoid stale data - Use break instead of return to ensure newline is written - Improve consistency in null handling across streaming implementations Co-authored-by: joone <1979160+joone@users.noreply.github.com> --- src/llm/index.ts | 14 ++++++++------ src/loz.ts | 4 ++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/llm/index.ts b/src/llm/index.ts index 1168476..78dc3c5 100644 --- a/src/llm/index.ts +++ b/src/llm/index.ts @@ -128,13 +128,13 @@ export class OllamaAPI extends LLMService { params.prompt, // responseOutput callback - called for each chunk as it arrives (chunk: string) => { + // Queue the chunk first, then resolve if someone is waiting + chunks.push(chunk); if (resolveNext) { - // If someone is waiting for a chunk, give it to them immediately - resolveNext({ value: { response: chunk }, done: false }); + // Someone is waiting - give them the chunk immediately + const nextChunk = chunks.shift()!; + resolveNext({ value: { response: nextChunk }, done: false }); resolveNext = null; - } else { - // Otherwise, queue it for later - chunks.push(chunk); } }, // contextOutput callback @@ -153,6 +153,8 @@ export class OllamaAPI extends LLMService { }).catch((err: any) => { error = err; isComplete = true; + // Clear the queue on error to avoid returning stale chunks + chunks.length = 0; if (resolveNext) { resolveNext({ value: undefined, done: true }); resolveNext = null; @@ -163,7 +165,7 @@ export class OllamaAPI extends LLMService { return { [Symbol.asyncIterator]: () => ({ next: (): Promise> => { - // If there's an error, throw it + // If there's an error, throw it immediately (no chunks should be returned after error) if (error) { throw error; } diff --git a/src/loz.ts b/src/loz.ts index 2c3b291..ed03ee0 100644 --- a/src/loz.ts +++ b/src/loz.ts @@ -351,7 +351,7 @@ export class Loz { } else if (api === "ollama") { // Handle Ollama streaming for await (const data of stream) { - if (data === null) return; + if (data === null) break; // Break instead of return to ensure newline is written const streamData = data.response || ""; curCompleteText += streamData; process.stdout.write(streamData); @@ -360,7 +360,7 @@ export class Loz { } else { // OpenAI streaming for await (const data of stream) { - if (data === null) return; + if (data === null) break; // Break instead of return to ensure newline is written const streamData = data.choices[0]?.delta?.content || ""; curCompleteText += streamData; process.stdout.write(streamData); From ecd52f42bf5d069db788fe74a565e730e33da591 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Feb 2026 10:02:45 +0000 Subject: [PATCH 5/5] Improve code clarity with better variable naming - Rename resolveNext to pendingResolve for clarity - Use chunks.splice(0) instead of chunks.length = 0 - Address code review style suggestions Co-authored-by: joone <1979160+joone@users.noreply.github.com> --- src/llm/index.ts | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/llm/index.ts b/src/llm/index.ts index 78dc3c5..2a3fbc4 100644 --- a/src/llm/index.ts +++ b/src/llm/index.ts @@ -119,7 +119,7 @@ export class OllamaAPI extends LLMService { // Create a queue to handle chunks as they arrive const chunks: string[] = []; - let resolveNext: ((value: IteratorResult<{ response: string }>) => void) | null = null; + let pendingResolve: ((value: IteratorResult<{ response: string }>) => void) | null = null; let isComplete = false; let error: any = null; @@ -130,11 +130,11 @@ export class OllamaAPI extends LLMService { (chunk: string) => { // Queue the chunk first, then resolve if someone is waiting chunks.push(chunk); - if (resolveNext) { + if (pendingResolve) { // Someone is waiting - give them the chunk immediately const nextChunk = chunks.shift()!; - resolveNext({ value: { response: nextChunk }, done: false }); - resolveNext = null; + pendingResolve({ value: { response: nextChunk }, done: false }); + pendingResolve = null; } }, // contextOutput callback @@ -146,18 +146,18 @@ export class OllamaAPI extends LLMService { ).then(() => { isComplete = true; // If someone is waiting, notify them we're done - if (resolveNext) { - resolveNext({ value: undefined, done: true }); - resolveNext = null; + if (pendingResolve) { + pendingResolve({ value: undefined, done: true }); + pendingResolve = null; } }).catch((err: any) => { error = err; isComplete = true; // Clear the queue on error to avoid returning stale chunks - chunks.length = 0; - if (resolveNext) { - resolveNext({ value: undefined, done: true }); - resolveNext = null; + chunks.splice(0); + if (pendingResolve) { + pendingResolve({ value: undefined, done: true }); + pendingResolve = null; } }); @@ -183,7 +183,7 @@ export class OllamaAPI extends LLMService { // Otherwise, wait for the next chunk return new Promise((resolve) => { - resolveNext = resolve; + pendingResolve = resolve; }); } })