diff --git a/pkg/openai/client.go b/pkg/openai/client.go
index 7715c657..3101e633 100644
--- a/pkg/openai/client.go
+++ b/pkg/openai/client.go
@@ -666,11 +666,17 @@ func (c *Client) call(ctx context.Context, request openai.ChatCompletionRequest,
 			},
 		}), nil
 	}
-
 	stream, err := c.c.CreateChatCompletionStream(ctx, request, headers, retryOpts...)
 	if err != nil {
 		if errors.Is(err, context.Canceled) {
-			err = nil
+			return types.CompletionMessage{
+				Content: []types.ContentPart{
+					{
+						Text: "User aborted the chat before model could respond",
+					},
+				},
+				Role: types.CompletionMessageRoleTypeAssistant,
+			}, nil
 		}
 		return types.CompletionMessage{}, err
 	}
@@ -683,6 +689,11 @@ func (c *Client) call(ctx context.Context, request openai.ChatCompletionRequest,
 	for {
 		response, err := stream.Recv()
 		if errors.Is(err, io.EOF) || errors.Is(err, context.Canceled) {
+			if len(partialMessage.Content) > 0 && partialMessage.Content[0].Text == "" {
+				// Place a text holder if LLM doesn't respond or user cancel the stream before it can produce any response.
+				// In anthropic models it will yield an error about non-empty message for assistant message
+				partialMessage.Content[0].Text = "User aborted the chat or chat finished before LLM can respond"
+			}
 			// If the stream is finished, either because we got an EOF or the context was canceled,
 			// then we're done. The cache won't save the response if the context was canceled.
 			return partialMessage, c.cache.Store(ctx, c.cacheKey(request), partialMessage)