99 "slices"
1010 "sort"
1111 "strings"
12+ "time"
1213
1314 openai "github.com/gptscript-ai/chat-completion-client"
1415 "github.com/gptscript-ai/gptscript/pkg/cache"
@@ -212,15 +213,15 @@ func (c *Client) seed(request openai.ChatCompletionRequest) int {
212213 return hash .Seed (newRequest )
213214}
214215
215- func (c * Client ) fromCache (ctx context.Context , messageRequest types.CompletionRequest , request openai.ChatCompletionRequest ) (result []openai. ChatCompletionStreamResponse , _ bool , _ error ) {
216+ func (c * Client ) fromCache (ctx context.Context , messageRequest types.CompletionRequest , request openai.ChatCompletionRequest ) (result types. CompletionMessage , _ bool , _ error ) {
216217 if ! messageRequest .GetCache () {
217- return nil , false , nil
218+ return types. CompletionMessage {} , false , nil
218219 }
219220 found , err := c .cache .Get (ctx , c .cacheKey (request ), & result )
220221 if err != nil {
221- return nil , false , err
222+ return types. CompletionMessage {} , false , err
222223 } else if ! found {
223- return nil , false , nil
224+ return types. CompletionMessage {} , false , nil
224225 }
225226 return result , true , nil
226227}
@@ -396,33 +397,27 @@ func (c *Client) Call(ctx context.Context, messageRequest types.CompletionReques
396397 IncludeUsage : true ,
397398 }
398399 }
399- response , ok , err := c .fromCache (ctx , messageRequest , request )
400+ result , ok , err := c .fromCache (ctx , messageRequest , request )
400401 if err != nil {
401402 return nil , err
402403 } else if ! ok {
403- response , err = c .call (ctx , request , id , status )
404+ result , err = c .call (ctx , request , id , status )
404405
405406 // If we got back a context length exceeded error, keep retrying and shrinking the message history until we pass.
406407 var apiError * openai.APIError
407408 if errors .As (err , & apiError ) && apiError .Code == "context_length_exceeded" && messageRequest .Chat {
408409 // Decrease maxTokens by 10% to make garbage collection more aggressive.
409410 // The retry loop will further decrease maxTokens if needed.
410411 maxTokens := decreaseTenPercent (messageRequest .MaxTokens )
411- response , err = c .contextLimitRetryLoop (ctx , request , id , maxTokens , status )
412+ result , err = c .contextLimitRetryLoop (ctx , request , id , maxTokens , status )
412413 }
413-
414414 if err != nil {
415415 return nil , err
416416 }
417417 } else {
418418 cacheResponse = true
419419 }
420420
421- result := types.CompletionMessage {}
422- for _ , response := range response {
423- result = appendMessage (result , response )
424- }
425-
426421 for i , content := range result .Content {
427422 if content .ToolCall != nil && content .ToolCall .ID == "" {
428423 content .ToolCall .ID = "call_" + hash .ID (content .ToolCall .Function .Name , content .ToolCall .Function .Arguments )[:8 ]
@@ -440,7 +435,6 @@ func (c *Client) Call(ctx context.Context, messageRequest types.CompletionReques
440435
441436 status <- types.CompletionStatus {
442437 CompletionID : id ,
443- Chunks : response ,
444438 Response : result ,
445439 Usage : result .Usage ,
446440 Cached : cacheResponse ,
@@ -449,9 +443,9 @@ func (c *Client) Call(ctx context.Context, messageRequest types.CompletionReques
449443 return & result , nil
450444}
451445
452- func (c * Client ) contextLimitRetryLoop (ctx context.Context , request openai.ChatCompletionRequest , id string , maxTokens int , status chan <- types.CompletionStatus ) ([]openai. ChatCompletionStreamResponse , error ) {
446+ func (c * Client ) contextLimitRetryLoop (ctx context.Context , request openai.ChatCompletionRequest , id string , maxTokens int , status chan <- types.CompletionStatus ) (types. CompletionMessage , error ) {
453447 var (
454- response []openai. ChatCompletionStreamResponse
448+ response types. CompletionMessage
455449 err error
456450 )
457451
@@ -469,10 +463,10 @@ func (c *Client) contextLimitRetryLoop(ctx context.Context, request openai.ChatC
469463 maxTokens = decreaseTenPercent (maxTokens )
470464 continue
471465 }
472- return nil , err
466+ return types. CompletionMessage {} , err
473467 }
474468
475- return nil , err
469+ return types. CompletionMessage {} , err
476470}
477471
478472func appendMessage (msg types.CompletionMessage , response openai.ChatCompletionStreamResponse ) types.CompletionMessage {
@@ -548,7 +542,7 @@ func override(left, right string) string {
548542 return left
549543}
550544
551- func (c * Client ) call (ctx context.Context , request openai.ChatCompletionRequest , transactionID string , partial chan <- types.CompletionStatus ) (responses []openai. ChatCompletionStreamResponse , _ error ) {
545+ func (c * Client ) call (ctx context.Context , request openai.ChatCompletionRequest , transactionID string , partial chan <- types.CompletionStatus ) (types. CompletionMessage , error ) {
552546 streamResponse := os .Getenv ("GPTSCRIPT_INTERNAL_OPENAI_STREAMING" ) != "false"
553547
554548 partial <- types.CompletionStatus {
@@ -565,56 +559,58 @@ func (c *Client) call(ctx context.Context, request openai.ChatCompletionRequest,
565559 request .StreamOptions = nil
566560 resp , err := c .c .CreateChatCompletion (ctx , request )
567561 if err != nil {
568- return nil , err
562+ return types. CompletionMessage {} , err
569563 }
570- return []openai.ChatCompletionStreamResponse {
571- {
572- ID : resp .ID ,
573- Object : resp .Object ,
574- Created : resp .Created ,
575- Model : resp .Model ,
576- Usage : resp .Usage ,
577- Choices : []openai.ChatCompletionStreamChoice {
578- {
579- Index : resp .Choices [0 ].Index ,
580- Delta : openai.ChatCompletionStreamChoiceDelta {
581- Content : resp .Choices [0 ].Message .Content ,
582- Role : resp .Choices [0 ].Message .Role ,
583- FunctionCall : resp .Choices [0 ].Message .FunctionCall ,
584- ToolCalls : resp .Choices [0 ].Message .ToolCalls ,
585- },
586- FinishReason : resp .Choices [0 ].FinishReason ,
564+ return appendMessage (types.CompletionMessage {}, openai.ChatCompletionStreamResponse {
565+ ID : resp .ID ,
566+ Object : resp .Object ,
567+ Created : resp .Created ,
568+ Model : resp .Model ,
569+ Usage : resp .Usage ,
570+ Choices : []openai.ChatCompletionStreamChoice {
571+ {
572+ Index : resp .Choices [0 ].Index ,
573+ Delta : openai.ChatCompletionStreamChoiceDelta {
574+ Content : resp .Choices [0 ].Message .Content ,
575+ Role : resp .Choices [0 ].Message .Role ,
576+ FunctionCall : resp .Choices [0 ].Message .FunctionCall ,
577+ ToolCalls : resp .Choices [0 ].Message .ToolCalls ,
587578 },
579+ FinishReason : resp .Choices [0 ].FinishReason ,
588580 },
589581 },
590- }, nil
582+ }) , nil
591583 }
592584
593585 stream , err := c .c .CreateChatCompletionStream (ctx , request )
594586 if err != nil {
595- return nil , err
587+ return types. CompletionMessage {} , err
596588 }
597589 defer stream .Close ()
598590
599- var partialMessage types.CompletionMessage
591+ var (
592+ partialMessage types.CompletionMessage
593+ start = time .Now ()
594+ last []string
595+ )
600596 for {
601597 response , err := stream .Recv ()
602598 if err == io .EOF {
603- return responses , c .cache .Store (ctx , c .cacheKey (request ), responses )
599+ return partialMessage , c .cache .Store (ctx , c .cacheKey (request ), partialMessage )
604600 } else if err != nil {
605- return nil , err
606- }
607- if len (response .Choices ) > 0 {
608- slog .Debug ("stream" , "content" , response .Choices [0 ].Delta .Content )
601+ return types.CompletionMessage {}, err
609602 }
603+ partialMessage = appendMessage (partialMessage , response )
610604 if partial != nil {
611- partialMessage = appendMessage (partialMessage , response )
612- partial <- types.CompletionStatus {
613- CompletionID : transactionID ,
614- PartialResponse : & partialMessage ,
605+ if time .Since (start ) > 500 * time .Millisecond {
606+ last = last [:0 ]
607+ partial <- types.CompletionStatus {
608+ CompletionID : transactionID ,
609+ PartialResponse : & partialMessage ,
610+ }
611+ start = time .Now ()
615612 }
616613 }
617- responses = append (responses , response )
618614 }
619615}
620616
0 commit comments