From 8e6b7c93c0db8e596f3a8e49562203ca58961b8d Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 16 Feb 2026 09:55:39 +0000
Subject: [PATCH 1/5] Initial plan


From 09429d84195c5577cadbe0414c9fd7b80818e3ec Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 16 Feb 2026 09:59:26 +0000
Subject: [PATCH 2/5] Fix cursor disappearing and add Ollama streaming support

- Remove process.stdin.setRawMode(true) to fix cursor disappearing
- Remove manual cursor manipulation with ANSI codes
- Implement Ollama completionStream using streamingGenerate API
- Add Ollama streaming handling in runCompletion method

Co-authored-by: joone <1979160+joone@users.noreply.github.com>
---
 src/llm/index.ts    | 37 +++++++++++++++++++++++++++++++++++--
 src/loz.ts          | 13 +++++++++++--
 src/prompt/index.ts | 24 +-----------------------
 3 files changed, 47 insertions(+), 27 deletions(-)
diff --git a/src/llm/index.ts b/src/llm/index.ts
index 5fe0307..40f3919 100644
--- a/src/llm/index.ts
+++ b/src/llm/index.ts
@@ -111,8 +111,41 @@ export class OllamaAPI extends LLMService {
   }
 
   public async completionStream(params: LLMSettings): Promise<any> {
-    // eslint-disable-next-line @typescript-eslint/no-unused-vars
-    return {};
+    if (DEBUG) {
+      console.log("Ollama stream completion");
+      console.log("Model: " + params.model);
+    }
+    await this.api.setModel(params.model);
+    
+    // Return a promise that resolves with a stream-like object
+    return new Promise((resolve, reject) => {
+      const chunks: string[] = [];
+      
+      this.api.streamingGenerate(
+        params.prompt,
+        // responseOutput callback - called for each chunk
+        (chunk: string) => {
+          chunks.push(chunk);
+        },
+        // contextOutput callback
+        null,
+        // fullResponseOutput callback
+        null,
+        // statsOutput callback
+        null
+      ).then(() => {
+        // When streaming is complete, resolve with an async iterator
+        resolve({
+          [Symbol.asyncIterator]: async function* () {
+            for (const chunk of chunks) {
+              yield { response: chunk };
+            }
+          }
+        });
+      }).catch((error: any) => {
+        reject(error);
+      });
+    });
   }
 }
 
diff --git a/src/loz.ts b/src/loz.ts
index f20bfe5..2c3b291 100644
--- a/src/loz.ts
+++ b/src/loz.ts
@@ -272,7 +272,7 @@ export class Loz {
     let curCompleteText = "";
     const api = this.checkAPI();
     
-    if (api === "openai" || api === "github-copilot") {
+    if (api === "openai" || api === "github-copilot" || api === "ollama") {
       let stream: any;
       try {
         stream = await this.llmAPI.completionStream(params);
@@ -347,6 +347,15 @@ export class Loz {
             }
           }
           
+          process.stdout.write("\n");
+        } else if (api === "ollama") {
+          // Handle Ollama streaming
+          for await (const data of stream) {
+            if (data === null) return;
+            const streamData = data.response || "";
+            curCompleteText += streamData;
+            process.stdout.write(streamData);
+          }
           process.stdout.write("\n");
         } else {
           // OpenAI streaming
@@ -395,7 +404,7 @@ export class Loz {
         }
         cli.prompt();
       });
-      cli.start(true);
+      cli.start();
     });
   }
 
diff --git a/src/prompt/index.ts b/src/prompt/index.ts
index edb9dd3..67d81e6 100644
--- a/src/prompt/index.ts
+++ b/src/prompt/index.ts
@@ -7,7 +7,6 @@ interface PromptInterface {
 export class CommandLinePrompt implements PromptInterface {
   private rl: readline.Interface;
   private callback: (input: string) => Promise<void>;
-  private timer: any;
 
   constructor(callback: (input: string) => Promise<void>) {
     // ...
@@ -22,26 +21,13 @@ export class CommandLinePrompt implements PromptInterface {
     this.rl.prompt();
   }
 
-  public async start(blinking?: boolean): Promise<void> {
+  public async start(): Promise<void> {
     // Set the prompt to display before each input
     this.rl.setPrompt("> ");
 
     // Show the cursor and prompt the user for input
     this.rl.prompt();
 
-    // Set the terminal to raw mode to allow for cursor manipulation
-    process.stdin.setRawMode(true);
-
-    // Display a blinking cursor
-    if (blinking) {
-      this.timer = setInterval(() => {
-        process.stdout.write("\x1B[?25h");
-        setTimeout(() => {
-          process.stdout.write("\x1B[?25l");
-        }, 500);
-      }, 1000);
-    }
-
     // Listen for user input
     this.rl.on("line", async (input) => {
       this.rl.prompt();
@@ -50,19 +36,11 @@ export class CommandLinePrompt implements PromptInterface {
 
     // Handle CTRL+C to exit the program
     this.rl.on("SIGINT", () => {
-      clearInterval(this.timer);
       this.rl.close();
     });
   }
 
   public exit(): void {
-    clearInterval(this.timer);
-    // Show the cursor
-    process.stdout.write("\x1B[?25h");
-    // Try to enable cursor blinking
-    process.stdout.write("\x1B[?12h");
-    // Reset the terminal to the normal mode
-    process.stdin.setRawMode(false);
     this.rl.close();
   }
 }

From aeadc7eb766ffe2b428e0a610b5358cc4ff3a611 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 16 Feb 2026 10:00:33 +0000
Subject: [PATCH 3/5] Improve Ollama streaming to yield chunks as they arrive

- Refactor completionStream to use async iterator pattern
- Yield chunks immediately as they arrive from callback
- Use queue and promise-based approach for true streaming
- Addresses code review feedback

Co-authored-by: joone <1979160+joone@users.noreply.github.com>
---
 src/llm/index.ts | 94 ++++++++++++++++++++++++++++++++++--------------
 1 file changed, 67 insertions(+), 27 deletions(-)

diff --git a/src/llm/index.ts b/src/llm/index.ts
index 40f3919..1168476 100644
--- a/src/llm/index.ts
+++ b/src/llm/index.ts
@@ -117,35 +117,75 @@ export class OllamaAPI extends LLMService {
     }
     await this.api.setModel(params.model);
     
-    // Return a promise that resolves with a stream-like object
-    return new Promise((resolve, reject) => {
-      const chunks: string[] = [];
-      
-      this.api.streamingGenerate(
-        params.prompt,
-        // responseOutput callback - called for each chunk
-        (chunk: string) => {
+    // Create a queue to handle chunks as they arrive
+    const chunks: string[] = [];
+    let resolveNext: ((value: IteratorResult<{ response: string }>) => void) | null = null;
+    let isComplete = false;
+    let error: any = null;
+    
+    // Start streaming immediately
+    this.api.streamingGenerate(
+      params.prompt,
+      // responseOutput callback - called for each chunk as it arrives
+      (chunk: string) => {
+        if (resolveNext) {
+          // If someone is waiting for a chunk, give it to them immediately
+          resolveNext({ value: { response: chunk }, done: false });
+          resolveNext = null;
+        } else {
+          // Otherwise, queue it for later
           chunks.push(chunk);
-        },
-        // contextOutput callback
-        null,
-        // fullResponseOutput callback
-        null,
-        // statsOutput callback
-        null
-      ).then(() => {
-        // When streaming is complete, resolve with an async iterator
-        resolve({
-          [Symbol.asyncIterator]: async function* () {
-            for (const chunk of chunks) {
-              yield { response: chunk };
-            }
-          }
-        });
-      }).catch((error: any) => {
-        reject(error);
-      });
+        }
+      },
+      // contextOutput callback
+      null,
+      // fullResponseOutput callback
+      null,
+      // statsOutput callback
+      null
+    ).then(() => {
+      isComplete = true;
+      // If someone is waiting, notify them we're done
+      if (resolveNext) {
+        resolveNext({ value: undefined, done: true });
+        resolveNext = null;
+      }
+    }).catch((err: any) => {
+      error = err;
+      isComplete = true;
+      if (resolveNext) {
+        resolveNext({ value: undefined, done: true });
+        resolveNext = null;
+      }
     });
+    
+    // Return an async iterator that yields chunks as they arrive
+    return {
+      [Symbol.asyncIterator]: () => ({
+        next: (): Promise<IteratorResult<{ response: string }>> => {
+          // If there's an error, throw it
+          if (error) {
+            throw error;
+          }
+          
+          // If we have queued chunks, return one immediately
+          if (chunks.length > 0) {
+            const chunk = chunks.shift()!;
+            return Promise.resolve({ value: { response: chunk }, done: false });
+          }
+          
+          // If streaming is complete, we're done
+          if (isComplete) {
+            return Promise.resolve({ value: undefined, done: true });
+          }
+          
+          // Otherwise, wait for the next chunk
+          return new Promise((resolve) => {
+            resolveNext = resolve;
+          });
+        }
+      })
+    };
   }
 }
 

From 9aff707e89ce90482dd506c8aebb499491601d2f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 16 Feb 2026 10:01:49 +0000
Subject: [PATCH 4/5] Fix race conditions and improve error handling in
 streaming

- Queue chunks before resolving to prevent race conditions
- Clear chunk queue on error to avoid stale data
- Use break instead of return to ensure newline is written
- Improve consistency in null handling across streaming implementations

Co-authored-by: joone <1979160+joone@users.noreply.github.com>
---
 src/llm/index.ts | 14 ++++++++------
 src/loz.ts       |  4 ++--
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/llm/index.ts b/src/llm/index.ts
index 1168476..78dc3c5 100644
--- a/src/llm/index.ts
+++ b/src/llm/index.ts
@@ -128,13 +128,13 @@ export class OllamaAPI extends LLMService {
       params.prompt,
       // responseOutput callback - called for each chunk as it arrives
       (chunk: string) => {
+        // Queue the chunk first, then resolve if someone is waiting
+        chunks.push(chunk);
         if (resolveNext) {
-          // If someone is waiting for a chunk, give it to them immediately
-          resolveNext({ value: { response: chunk }, done: false });
+          // Someone is waiting - give them the chunk immediately
+          const nextChunk = chunks.shift()!;
+          resolveNext({ value: { response: nextChunk }, done: false });
           resolveNext = null;
-        } else {
-          // Otherwise, queue it for later
-          chunks.push(chunk);
         }
       },
       // contextOutput callback
@@ -153,6 +153,8 @@ export class OllamaAPI extends LLMService {
     }).catch((err: any) => {
       error = err;
       isComplete = true;
+      // Clear the queue on error to avoid returning stale chunks
+      chunks.length = 0;
       if (resolveNext) {
         resolveNext({ value: undefined, done: true });
         resolveNext = null;
@@ -163,7 +165,7 @@ export class OllamaAPI extends LLMService {
     return {
       [Symbol.asyncIterator]: () => ({
         next: (): Promise<IteratorResult<{ response: string }>> => {
-          // If there's an error, throw it
+          // If there's an error, throw it immediately (no chunks should be returned after error)
           if (error) {
             throw error;
           }
diff --git a/src/loz.ts b/src/loz.ts
index 2c3b291..ed03ee0 100644
--- a/src/loz.ts
+++ b/src/loz.ts
@@ -351,7 +351,7 @@ export class Loz {
         } else if (api === "ollama") {
           // Handle Ollama streaming
           for await (const data of stream) {
-            if (data === null) return;
+            if (data === null) break; // Break instead of return to ensure newline is written
             const streamData = data.response || "";
             curCompleteText += streamData;
             process.stdout.write(streamData);
@@ -360,7 +360,7 @@ export class Loz {
         } else {
           // OpenAI streaming
           for await (const data of stream) {
-            if (data === null) return;
+            if (data === null) break; // Break instead of return to ensure newline is written
             const streamData = data.choices[0]?.delta?.content || "";
             curCompleteText += streamData;
             process.stdout.write(streamData);

From ecd52f42bf5d069db788fe74a565e730e33da591 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 16 Feb 2026 10:02:45 +0000
Subject: [PATCH 5/5] Improve code clarity with better variable naming

- Rename resolveNext to pendingResolve for clarity
- Use chunks.splice(0) instead of chunks.length = 0
- Address code review style suggestions

Co-authored-by: joone <1979160+joone@users.noreply.github.com>
---
 src/llm/index.ts | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/llm/index.ts b/src/llm/index.ts
index 78dc3c5..2a3fbc4 100644
--- a/src/llm/index.ts
+++ b/src/llm/index.ts
@@ -119,7 +119,7 @@ export class OllamaAPI extends LLMService {
     
     // Create a queue to handle chunks as they arrive
     const chunks: string[] = [];
-    let resolveNext: ((value: IteratorResult<{ response: string }>) => void) | null = null;
+    let pendingResolve: ((value: IteratorResult<{ response: string }>) => void) | null = null;
     let isComplete = false;
     let error: any = null;
     
@@ -130,11 +130,11 @@ export class OllamaAPI extends LLMService {
       (chunk: string) => {
         // Queue the chunk first, then resolve if someone is waiting
         chunks.push(chunk);
-        if (resolveNext) {
+        if (pendingResolve) {
           // Someone is waiting - give them the chunk immediately
           const nextChunk = chunks.shift()!;
-          resolveNext({ value: { response: nextChunk }, done: false });
-          resolveNext = null;
+          pendingResolve({ value: { response: nextChunk }, done: false });
+          pendingResolve = null;
         }
       },
       // contextOutput callback
@@ -146,18 +146,18 @@ export class OllamaAPI extends LLMService {
     ).then(() => {
       isComplete = true;
       // If someone is waiting, notify them we're done
-      if (resolveNext) {
-        resolveNext({ value: undefined, done: true });
-        resolveNext = null;
+      if (pendingResolve) {
+        pendingResolve({ value: undefined, done: true });
+        pendingResolve = null;
       }
     }).catch((err: any) => {
       error = err;
       isComplete = true;
       // Clear the queue on error to avoid returning stale chunks
-      chunks.length = 0;
-      if (resolveNext) {
-        resolveNext({ value: undefined, done: true });
-        resolveNext = null;
+      chunks.splice(0);
+      if (pendingResolve) {
+        pendingResolve({ value: undefined, done: true });
+        pendingResolve = null;
       }
     });
     
@@ -183,7 +183,7 @@ export class OllamaAPI extends LLMService {
           
           // Otherwise, wait for the next chunk
           return new Promise((resolve) => {
-            resolveNext = resolve;
+            pendingResolve = resolve;
           });
         }
       })