@@ -4,10 +4,10 @@ import {
44 Chunk ,
55 CompletionOptions ,
66 LLMOptions ,
7- MessageContent ,
87} from "../../index.js" ;
98import { renderChatMessage , stripImages } from "../../util/messageContent.js" ;
109import { BaseLLM } from "../index.js" ;
10+ import { DEFAULT_REASONING_TOKENS } from "../constants.js" ;
1111
1212class Cohere extends BaseLLM {
1313 static providerName = "cohere" ;
@@ -19,7 +19,6 @@ class Cohere extends BaseLLM {
1919
2020 private _convertMessages ( msgs : ChatMessage [ ] ) : any [ ] {
2121 const messages = [ ] ;
22- let lastToolPlan : MessageContent | undefined ;
2322 for ( const m of msgs ) {
2423 if ( ! m . content ) {
2524 continue ;
@@ -48,36 +47,44 @@ class Cohere extends BaseLLM {
4847 } ) ;
4948 break ;
5049 case "thinking" :
51- lastToolPlan = m . content ;
50+ messages . push ( {
51+ role : "assistant" ,
52+ content : [
53+ {
54+ type : "thinking" ,
55+ thinking : m . content ,
56+ } ,
57+ ] ,
58+ } ) ;
5259 break ;
5360 case "assistant" :
54- if ( m . toolCalls ) {
55- if ( ! lastToolPlan ) {
56- throw new Error ( "No tool plan found" ) ;
57- }
58- messages . push ( {
61+ let msg : any ;
62+ if ( messages . at ( - 1 ) ?. content [ 0 ] ?. thinking ) {
63+ msg = messages . pop ( ) ;
64+ } else {
65+ msg = {
5966 role : m . role ,
60- tool_calls : m . toolCalls . map ( ( toolCall ) => ( {
61- id : toolCall . id ,
62- type : "function" ,
63- function : {
64- name : toolCall . function ?. name ,
65- arguments : toolCall . function ?. arguments ,
66- } ,
67- } ) ) ,
68- // Ideally the tool plan would be in this message, but it is
69- // split in another, usually the previous, this one's content is
70- // a space.
71- // tool_plan: m.content,
72- tool_plan : lastToolPlan ,
67+ content : [ ] ,
68+ } ;
69+ }
70+
71+ if ( m . toolCalls ) {
72+ msg . tool_calls = m . toolCalls . map ( ( toolCall ) => ( {
73+ id : toolCall . id ,
74+ type : "function" ,
75+ function : {
76+ name : toolCall . function ?. name ,
77+ arguments : toolCall . function ?. arguments ,
78+ } ,
79+ } ) ) ;
80+ } else {
81+ msg . content . push ( {
82+ type : "text" ,
83+ text : m . content ,
7384 } ) ;
74- lastToolPlan = undefined ;
75- break ;
7685 }
77- messages . push ( {
78- role : m . role ,
79- content : m . content ,
80- } ) ;
86+
87+ messages . push ( msg ) ;
8188 break ;
8289 case "system" :
8390 messages . push ( {
@@ -110,6 +117,15 @@ class Cohere extends BaseLLM {
110117 stop_sequences : options . stop ?. slice ( 0 , Cohere . maxStopSequences ) ,
111118 frequency_penalty : options . frequencyPenalty ,
112119 presence_penalty : options . presencePenalty ,
120+ thinking : options . reasoning
121+ ? {
122+ type : "enabled" as const ,
123+ token_budget :
124+ options . reasoningBudgetTokens ?? DEFAULT_REASONING_TOKENS ,
125+ }
126+ : // Reasoning is enabled by default for models that support it.
127+ // https://docs.cohere.com/reference/chat-stream#request.body.thinking
128+ { type : "disabled" as const } ,
113129 tools : options . tools ?. map ( ( tool ) => ( {
114130 type : "function" ,
115131 function : {
@@ -159,14 +175,17 @@ class Cohere extends BaseLLM {
159175
160176 if ( options . stream === false ) {
161177 const data = await resp . json ( ) ;
178+ for ( const content of data . message . content ) {
179+ if ( content . thinking ) {
180+ yield { role : "thinking" , content : content . thinking } ;
181+ continue ;
182+ }
183+ yield { role : "assistant" , content : content . text } ;
184+ }
162185 if ( data . message . tool_calls ) {
163- yield {
164- // Use the "thinking" role for `tool_plan`, since there is no such
165- // role in the Cohere API at the moment and it is a "a
166- // chain-of-thought style reflection".
167- role : "thinking" ,
168- content : data . message . tool_plan ,
169- } ;
186+ if ( data . message . tool_plan ) {
187+ yield { role : "thinking" , content : data . message . tool_plan } ;
188+ }
170189 yield {
171190 role : "assistant" ,
172191 content : "" ,
@@ -181,7 +200,6 @@ class Cohere extends BaseLLM {
181200 } ;
182201 return ;
183202 }
184- yield { role : "assistant" , content : data . message . content [ 0 ] . text } ;
185203 return ;
186204 }
187205
@@ -192,16 +210,20 @@ class Cohere extends BaseLLM {
192210 switch ( value . type ) {
193211 // https://docs.cohere.com/v2/docs/streaming#content-delta
194212 case "content-delta" :
213+ if ( value . delta . message . content . thinking ) {
214+ yield {
215+ role : "thinking" ,
216+ content : value . delta . message . content . thinking ,
217+ } ;
218+ break ;
219+ }
195220 yield {
196221 role : "assistant" ,
197222 content : value . delta . message . content . text ,
198223 } ;
199224 break ;
200225 // https://docs.cohere.com/reference/chat-stream#request.body.messages.assistant.tool_plan
201226 case "tool-plan-delta" :
202- // Use the "thinking" role for `tool_plan`, since there is no such
203- // role in the Cohere API at the moment and it is a "a
204- // chain-of-thought style reflection".
205227 yield {
206228 role : "thinking" ,
207229 content : value . delta . message . tool_plan ,
0 commit comments