@@ -117,98 +117,98 @@ Llama Stack agent is capable of running multi-turn inference using both customiz
117117
118118Create the agent configuration:
119119```
120- val agentConfig =
121- AgentConfig.builder()
122- .enableSessionPersistence(false)
123- .instructions("You're a helpful assistant")
124- .maxInferIters(100)
125- .model("meta-llama/Llama-3.1-8B-Instruct")
126- .samplingParams(
127- SamplingParams.builder()
128- .strategy(
129- SamplingParams.Strategy.ofGreedySampling()
130- )
131- .build()
132- )
133- .toolChoice(AgentConfig.ToolChoice.AUTO)
134- .toolPromptFormat(AgentConfig.ToolPromptFormat.JSON)
135- .clientTools(
136- listOf(
137- CustomTools.getCreateCalendarEventTool() #Custom local tools
138- )
120+ val agentConfig =
121+ AgentConfig.builder()
122+ .enableSessionPersistence(false)
123+ .instructions("You're a helpful assistant")
124+ .maxInferIters(100)
125+ .model("meta-llama/Llama-3.1-8B-Instruct")
126+ .samplingParams(
127+ SamplingParams.builder()
128+ .strategy(
129+ SamplingParams.Strategy.ofGreedySampling()
139130 )
140131 .build()
132+ )
133+ .toolChoice(AgentConfig.ToolChoice.AUTO)
134+ .toolPromptFormat(AgentConfig.ToolPromptFormat.JSON)
135+ .clientTools(
136+ listOf(
137+ CustomTools.getCreateCalendarEventTool() #Custom local tools
138+ )
139+ )
140+ .build()
141141```
142142
143143Create the agent:
144144```
145- val agentService = client!!.agents()
146- val agentCreateResponse = agentService.create(
147- AgentCreateParams.builder()
148- .agentConfig(agentConfig)
149- .build(),
150- )
151- val agentId = agentCreateResponse.agentId()
145+ val agentService = client!!.agents()
146+ val agentCreateResponse = agentService.create(
147+ AgentCreateParams.builder()
148+ .agentConfig(agentConfig)
149+ .build(),
150+ )
151+ val agentId = agentCreateResponse.agentId()
152152```
153153
154154Create the session:
155155```
156- val sessionService = agentService.session()
157- val agentSessionCreateResponse = sessionService.create(
158- AgentSessionCreateParams.builder()
159- .agentId(agentId)
160- .sessionName("test-session")
161- .build()
162- )
156+ val sessionService = agentService.session()
157+ val agentSessionCreateResponse = sessionService.create(
158+ AgentSessionCreateParams.builder()
159+ .agentId(agentId)
160+ .sessionName("test-session")
161+ .build()
162+ )
163163
164- val sessionId = agentSessionCreateResponse.sessionId()
164+ val sessionId = agentSessionCreateResponse.sessionId()
165165```
166166
167167Create a turn:
168168```
169- val turnService = agentService.turn()
170- val agentTurnCreateResponseStream = turnService.createStreaming(
171- AgentTurnCreateParams.builder()
172- .agentId(agentId)
173- .messages(
174- listOf(
175- AgentTurnCreateParams.Message.ofUser(
176- UserMessage.builder()
177- .content(InterleavedContent.ofString("What is the capital of France?"))
178- .build()
179- )
169+ val turnService = agentService.turn()
170+ val agentTurnCreateResponseStream = turnService.createStreaming(
171+ AgentTurnCreateParams.builder()
172+ .agentId(agentId)
173+ .messages(
174+ listOf(
175+ AgentTurnCreateParams.Message.ofUser(
176+ UserMessage.builder()
177+ .content(InterleavedContent.ofString("What is the capital of France?"))
178+ .build()
180179 )
181- .sessionId(sessionId)
182- .build()
183- )
180+ )
181+ .sessionId(sessionId)
182+ .build()
183+ )
184184```
185185
186186Handle the stream chunk callback:
187187```
188- agentTurnCreateResponseStream.use {
189- agentTurnCreateResponseStream.asSequence().forEach {
190- val agentResponsePayload = it.responseStreamChunk()?.event()?.payload()
191- if (agentResponsePayload != null) {
192- when {
193- agentResponsePayload.isAgentTurnResponseTurnStart() -> {
194- // Handle Turn Start Payload
195- }
196- agentResponsePayload.isAgentTurnResponseStepStart() -> {
197- // Handle Step Start Payload
198- }
199- agentResponsePayload.isAgentTurnResponseStepProgress() -> {
200- // Handle Step Progress Payload
201- }
202- agentResponsePayload.isAgentTurnResponseStepComplete() -> {
203- // Handle Step Complete Payload
204- }
205- agentResponsePayload.isAgentTurnResponseTurnComplete() -> {
206- // Handle Turn Complete Payload
207- }
208- }
188+ agentTurnCreateResponseStream.use {
189+ agentTurnCreateResponseStream.asSequence().forEach {
190+ val agentResponsePayload = it.responseStreamChunk()?.event()?.payload()
191+ if (agentResponsePayload != null) {
192+ when {
193+ agentResponsePayload.isAgentTurnResponseTurnStart() -> {
194+ // Handle Turn Start Payload
195+ }
196+ agentResponsePayload.isAgentTurnResponseStepStart() -> {
197+ // Handle Step Start Payload
198+ }
199+ agentResponsePayload.isAgentTurnResponseStepProgress() -> {
200+ // Handle Step Progress Payload
201+ }
202+ agentResponsePayload.isAgentTurnResponseStepComplete() -> {
203+ // Handle Step Complete Payload
204+ }
205+ agentResponsePayload.isAgentTurnResponseTurnComplete() -> {
206+ // Handle Turn Complete Payload
209207 }
210208 }
211209 }
210+ }
211+ }
212212```
213213
214214#### Local
@@ -307,58 +307,58 @@ For the local module, we expect the embedding generation to be done on the Andro
307307
308308Create vectorDB instance:
309309```
310- val vectorDbId = UUID.randomUUID().toString()
311- client!!.vectorDbs().register(
312- VectorDbRegisterParams.builder()
313- .vectorDbId(vectorDbId)
314- .embeddingModel("not_required")
315- .build()
316- )
310+ val vectorDbId = UUID.randomUUID().toString()
311+ client!!.vectorDbs().register(
312+ VectorDbRegisterParams.builder()
313+ .vectorDbId(vectorDbId)
314+ .embeddingModel("not_required")
315+ .build()
316+ )
317317```
318318
319319Create chunks (supports single document):
320320```
321- val document = Document.builder()
322- .documentId("1")
323- .content(text) // text is a string of the entire contents of the document. Done by the Android app
324- .metadata(Document.Metadata.builder().build())
325- .build()
326- val tagToolParams = ToolRuntimeRagToolInsertParams.builder()
327- .vectorDbId(vectorDbId)
328- .chunkSizeInTokens(chunkSizeInWords)
329- .documents(listOf(document))
330- .build();
331- val ragtool = client!!.toolRuntime().ragTool() as RagToolServiceLocalImpl
332- val chunks = ragtool.createChunks(tagToolParams)
321+ val document = Document.builder()
322+ .documentId("1")
323+ .content(text) // text is a string of the entire contents of the document. Done by the Android app
324+ .metadata(Document.Metadata.builder().build())
325+ .build()
326+ val tagToolParams = ToolRuntimeRagToolInsertParams.builder()
327+ .vectorDbId(vectorDbId)
328+ .chunkSizeInTokens(chunkSizeInWords)
329+ .documents(listOf(document))
330+ .build();
331+ val ragtool = client!!.toolRuntime().ragTool() as RagToolServiceLocalImpl
332+ val chunks = ragtool.createChunks(tagToolParams)
333333```
334334
335335Generate embeddings for chunks: *** Done in Android App***
336336
337337Store embedding chunks in Vector DB:
338338```
339- ragtool.insert(vectorDbId, embeddings, chunks)
339+ ragtool.insert(vectorDbId, embeddings, chunks)
340340```
341341
342342Generate embeddings for user prompt: *** Done in Android App***
343343
344344Add to turnParams to call RAG tool call with Agent (see in-line comments for more information):
345345
346346```
347- turnParams.addToolgroup(
348- AgentTurnCreateParams.Toolgroup.ofAgentToolGroupWithArgs(
349- AgentTurnCreateParams.Toolgroup.AgentToolGroupWithArgs.builder()
350- .name("builtin::rag/knowledge_search") // Tool name
351- .args(
352- AgentTurnCreateParams.Toolgroup.AgentToolGroupWithArgs.Args.builder()
353- .putAdditionalProperty("vector_db_id", JsonValue.from(vectorDbId))
354- .putAdditionalProperty("ragUserPromptEmbedded", JsonValue.from(ragUserPromptEmbedded)) // Embedded user prompt
355- .putAdditionalProperty("maxNeighborCount", JsonValue.from(3)) // # of similar neighbors to retrieve from Vector DB.
356- .putAdditionalProperty("ragInstruction", JsonValue.from(localRagSystemPrompt())) // RAG system prompt provided from Android app
357- .build()
358- )
359- .build()
360- )
347+ turnParams.addToolgroup(
348+ AgentTurnCreateParams.Toolgroup.ofAgentToolGroupWithArgs(
349+ AgentTurnCreateParams.Toolgroup.AgentToolGroupWithArgs.builder()
350+ .name("builtin::rag/knowledge_search") // Tool name
351+ .args(
352+ AgentTurnCreateParams.Toolgroup.AgentToolGroupWithArgs.Args.builder()
353+ .putAdditionalProperty("vector_db_id", JsonValue.from(vectorDbId))
354+ .putAdditionalProperty("ragUserPromptEmbedded", JsonValue.from(ragUserPromptEmbedded)) // Embedded user prompt
355+ .putAdditionalProperty("maxNeighborCount", JsonValue.from(3)) // # of similar neighbors to retrieve from Vector DB.
356+ .putAdditionalProperty("ragInstruction", JsonValue.from(localRagSystemPrompt())) // RAG system prompt provided from Android app
357+ .build()
361358 )
359+ .build()
360+ )
361+ )
362362
363363// Now create a turn and handle response like in Agent section
364364
@@ -381,32 +381,32 @@ The Kotlin SDK also supports single image inference where the image can be a HTT
381381Create an image inference with agent:
382382
383383```
384- val agentTurnCreateResponseStream =
385- turnService.createStreaming(
386- AgentTurnCreateParams.builder()
387- .agentId(agentId)
388- .messages(
389- listOf(
390- AgentTurnCreateParams.Message.ofUser(
391- UserMessage.builder()
392- .content(InterleavedContent.ofString("What is in the image?"))
393- .build()
394- ),
395- AgentTurnCreateParams.Message.ofUser(
396- UserMessage.builder()
397- .content(InterleavedContent.ofImageContentItem(
398- InterleavedContent.ImageContentItem.builder()
399- .image(image)
400- .type(JsonValue.from("image"))
401- .build()
402- ))
384+ val agentTurnCreateResponseStream =
385+ turnService.createStreaming(
386+ AgentTurnCreateParams.builder()
387+ .agentId(agentId)
388+ .messages(
389+ listOf(
390+ AgentTurnCreateParams.Message.ofUser(
391+ UserMessage.builder()
392+ .content(InterleavedContent.ofString("What is in the image?"))
393+ .build()
394+ ),
395+ AgentTurnCreateParams.Message.ofUser(
396+ UserMessage.builder()
397+ .content(InterleavedContent.ofImageContentItem(
398+ InterleavedContent.ImageContentItem.builder()
399+ .image(image)
400+ .type(JsonValue.from("image"))
403401 .build()
404- )
405- )
402+ ))
403+ .build( )
406404 )
407- .sessionId(sessionId)
408- .build()
405+ )
409406 )
407+ .sessionId(sessionId)
408+ .build()
409+ )
410410```
411411
412412Note that image captured on device needs to be encoded with Base64 before sending it to the model. Check out our demo app example [ here] ( https://github.com/meta-llama/llama-stack-apps/tree/main/examples/android_app )
0 commit comments