@@ -4,10 +4,10 @@ import {
44 Chunk ,
55 CompletionOptions ,
66 LLMOptions ,
7- MessageContent ,
87} from "../../index.js" ;
98import { renderChatMessage , stripImages } from "../../util/messageContent.js" ;
109import { BaseLLM } from "../index.js" ;
10+ import { DEFAULT_REASONING_TOKENS } from "../constants.js" ;
1111
1212class Cohere extends BaseLLM {
1313 static providerName = "cohere" ;
@@ -19,7 +19,6 @@ class Cohere extends BaseLLM {
1919
2020 private _convertMessages ( msgs : ChatMessage [ ] ) : any [ ] {
2121 const messages = [ ] ;
22- let lastToolPlan : MessageContent | undefined ;
2322 for ( const m of msgs ) {
2423 if ( ! m . content ) {
2524 continue ;
@@ -48,36 +47,48 @@ class Cohere extends BaseLLM {
4847 } ) ;
4948 break ;
5049 case "thinking" :
51- lastToolPlan = m . content ;
50+ messages . push ( {
51+ role : "assistant" ,
52+ content : [
53+ {
54+ type : "thinking" ,
55+ thinking : m . content ,
56+ } ,
57+ ] ,
58+ } ) ;
5259 break ;
5360 case "assistant" :
61+ let msg : any ;
62+ if ( messages . at ( - 1 ) ?. content [ 0 ] ?. thinking ) {
63+ msg = messages . pop ( ) ;
64+ } else {
65+ msg = {
66+ role : m . role ,
67+ content : [ ] ,
68+ } ;
69+ }
70+
5471 if ( m . toolCalls ) {
55- if ( ! lastToolPlan ) {
56- throw new Error ( "No tool plan found" ) ;
72+ msg . tool_calls = m . toolCalls . map ( ( toolCall ) => ( {
73+ id : toolCall . id ,
74+ type : "function" ,
75+ function : {
76+ name : toolCall . function ?. name ,
77+ arguments : toolCall . function ?. arguments ,
78+ } ,
79+ } ) ) ;
80+ } else {
81+ if ( typeof m . content === "string" ) {
82+ msg . content . push ( {
83+ type : "text" ,
84+ text : m . content ,
85+ } ) ;
86+ } else {
87+ msg . content . push ( ...m . content ) ;
5788 }
58- messages . push ( {
59- role : m . role ,
60- tool_calls : m . toolCalls . map ( ( toolCall ) => ( {
61- id : toolCall . id ,
62- type : "function" ,
63- function : {
64- name : toolCall . function ?. name ,
65- arguments : toolCall . function ?. arguments ,
66- } ,
67- } ) ) ,
68- // Ideally the tool plan would be in this message, but it is
69- // split in another, usually the previous, this one's content is
70- // a space.
71- // tool_plan: m.content,
72- tool_plan : lastToolPlan ,
73- } ) ;
74- lastToolPlan = undefined ;
75- break ;
7689 }
77- messages . push ( {
78- role : m . role ,
79- content : m . content ,
80- } ) ;
90+
91+ messages . push ( msg ) ;
8192 break ;
8293 case "system" :
8394 messages . push ( {
@@ -110,6 +121,15 @@ class Cohere extends BaseLLM {
110121 stop_sequences : options . stop ?. slice ( 0 , Cohere . maxStopSequences ) ,
111122 frequency_penalty : options . frequencyPenalty ,
112123 presence_penalty : options . presencePenalty ,
124+ thinking : options . reasoning
125+ ? {
126+ type : "enabled" as const ,
127+ token_budget :
128+ options . reasoningBudgetTokens ?? DEFAULT_REASONING_TOKENS ,
129+ }
130+ : // Reasoning is enabled by default for models that support it.
131+ // https://docs.cohere.com/reference/chat-stream#request.body.thinking
132+ { type : "disabled" as const } ,
113133 tools : options . tools ?. map ( ( tool ) => ( {
114134 type : "function" ,
115135 function : {
@@ -159,14 +179,17 @@ class Cohere extends BaseLLM {
159179
160180 if ( options . stream === false ) {
161181 const data = await resp . json ( ) ;
182+ for ( const content of data . message . content ) {
183+ if ( content . thinking ) {
184+ yield { role : "thinking" , content : content . thinking } ;
185+ continue ;
186+ }
187+ yield { role : "assistant" , content : content . text } ;
188+ }
162189 if ( data . message . tool_calls ) {
163- yield {
164- // Use the "thinking" role for `tool_plan`, since there is no such
165- // role in the Cohere API at the moment and it is a "a
166- // chain-of-thought style reflection".
167- role : "thinking" ,
168- content : data . message . tool_plan ,
169- } ;
190+ if ( data . message . tool_plan ) {
191+ yield { role : "thinking" , content : data . message . tool_plan } ;
192+ }
170193 yield {
171194 role : "assistant" ,
172195 content : "" ,
@@ -181,7 +204,6 @@ class Cohere extends BaseLLM {
181204 } ;
182205 return ;
183206 }
184- yield { role : "assistant" , content : data . message . content [ 0 ] . text } ;
185207 return ;
186208 }
187209
@@ -192,16 +214,20 @@ class Cohere extends BaseLLM {
192214 switch ( value . type ) {
193215 // https://docs.cohere.com/v2/docs/streaming#content-delta
194216 case "content-delta" :
217+ if ( value . delta . message . content . thinking ) {
218+ yield {
219+ role : "thinking" ,
220+ content : value . delta . message . content . thinking ,
221+ } ;
222+ break ;
223+ }
195224 yield {
196225 role : "assistant" ,
197226 content : value . delta . message . content . text ,
198227 } ;
199228 break ;
200229 // https://docs.cohere.com/reference/chat-stream#request.body.messages.assistant.tool_plan
201230 case "tool-plan-delta" :
202- // Use the "thinking" role for `tool_plan`, since there is no such
203- // role in the Cohere API at the moment and it is a "a
204- // chain-of-thought style reflection".
205231 yield {
206232 role : "thinking" ,
207233 content : value . delta . message . tool_plan ,
0 commit comments