@@ -2752,10 +2752,10 @@ def _create_bitmap_from_bytes(self, image_bytes: bytes):
27522752 (ctypes .c_uint8 * len (image_bytes )).from_buffer (bytearray (image_bytes )),
27532753 len (image_bytes )
27542754 )
2755-
2755+
27562756 if bitmap is None :
27572757 raise ValueError ("Failed to create bitmap from image bytes" )
2758-
2758+
27592759 return bitmap
27602760
27612761 def __call__ (
@@ -2814,18 +2814,18 @@ def __call__(
28142814 trim_blocks = True ,
28152815 lstrip_blocks = True ,
28162816 ).from_string (self .CHAT_FORMAT )
2817-
2817+
28182818 # Get the default media marker
28192819 media_marker = self ._mtmd_cpp .mtmd_default_marker ().decode ('utf-8' )
2820-
2820+
28212821 # Replace image URLs with media markers in the template
28222822 text = template .render (
28232823 messages = messages ,
28242824 add_generation_prompt = True ,
28252825 eos_token = llama .detokenize ([llama .token_eos ()]),
28262826 bos_token = llama .detokenize ([llama .token_bos ()]),
28272827 )
2828-
2828+
28292829 # Replace image URLs in text with media markers
28302830 for image_url in image_urls :
28312831 text = text .replace (image_url , media_marker )
@@ -2875,40 +2875,40 @@ def __call__(
28752875 # Process each chunk
28762876 n_past = llama_cpp .llama_pos (0 )
28772877 n_chunks = self ._mtmd_cpp .mtmd_input_chunks_size (chunks )
2878-
2878+
28792879 for i in range (n_chunks ):
28802880 chunk = self ._mtmd_cpp .mtmd_input_chunks_get (chunks , i )
28812881 if chunk is None :
28822882 continue
28832883
28842884 chunk_type = self ._mtmd_cpp .mtmd_input_chunk_get_type (chunk )
2885-
2885+
28862886 if chunk_type == self ._mtmd_cpp .MTMD_INPUT_CHUNK_TYPE_TEXT :
28872887 # Handle text chunk
28882888 n_tokens_out = ctypes .c_size_t ()
28892889 tokens_ptr = self ._mtmd_cpp .mtmd_input_chunk_get_tokens_text (
28902890 chunk , ctypes .byref (n_tokens_out )
28912891 )
2892-
2892+
28932893 if tokens_ptr and n_tokens_out .value > 0 :
28942894 # Convert ctypes array to Python list
28952895 tokens = [tokens_ptr [j ] for j in range (n_tokens_out .value )]
2896-
2896+
28972897 if llama .n_tokens + len (tokens ) > llama .n_ctx ():
28982898 raise ValueError (
28992899 f"Prompt exceeds n_ctx: { llama .n_tokens + len (tokens )} > { llama .n_ctx ()} "
29002900 )
29012901 llama .eval (tokens )
2902-
2902+
29032903 elif chunk_type in [self ._mtmd_cpp .MTMD_INPUT_CHUNK_TYPE_IMAGE , self ._mtmd_cpp .MTMD_INPUT_CHUNK_TYPE_AUDIO ]:
29042904 # Handle image/audio chunk using helper
29052905 chunk_n_tokens = self ._mtmd_cpp .mtmd_input_chunk_get_n_tokens (chunk )
2906-
2906+
29072907 if llama .n_tokens + chunk_n_tokens > llama .n_ctx ():
29082908 raise ValueError (
29092909 f"Prompt exceeds n_ctx: { llama .n_tokens + chunk_n_tokens } > { llama .n_ctx ()} "
29102910 )
2911-
2911+
29122912 new_n_past = llama_cpp .llama_pos (0 )
29132913 result = self ._mtmd_cpp .mtmd_helper_eval_chunk_single (
29142914 self .mtmd_ctx ,
@@ -2920,10 +2920,10 @@ def __call__(
29202920 False , # logits_last
29212921 ctypes .byref (new_n_past )
29222922 )
2923-
2923+
29242924 if result != 0 :
29252925 raise ValueError (f"Failed to evaluate chunk: error code { result } " )
2926-
2926+
29272927 # Update llama's token count
29282928 llama .n_tokens = new_n_past .value
29292929
@@ -3013,34 +3013,14 @@ def __call__(
30133013 grammar = grammar ,
30143014 logit_bias = logit_bias ,
30153015 )
3016-
3016+
30173017 if tool is not None :
30183018 tool_name = tool ["function" ]["name" ]
30193019 return _convert_completion_to_chat_function (
30203020 tool_name , completion_or_chunks , stream
30213021 )
30223022 return _convert_completion_to_chat (completion_or_chunks , stream = stream )
30233023
3024- def eval_image (self , llama : llama .Llama , image_url : str ):
3025- image_bytes = self .load_image (image_url )
3026- embed = self ._embed_image_bytes (image_bytes , llama .context_params .n_threads_batch )
3027- if llama .n_tokens + embed .contents .n_image_pos > llama .n_ctx ():
3028- raise ValueError (
3029- f"Prompt exceeds n_ctx: { llama .n_tokens + embed .contents .n_image_pos } > { llama .n_ctx ()} "
3030- )
3031- n_past = ctypes .c_int (llama .n_tokens )
3032- n_past_p = ctypes .pointer (n_past )
3033- with suppress_stdout_stderr (disable = self .verbose ):
3034- self ._llava_cpp .llava_eval_image_embed (
3035- llama .ctx ,
3036- embed ,
3037- llama .n_batch ,
3038- n_past_p ,
3039- )
3040- # Required to avoid issues with hf tokenizer
3041- llama .input_ids [llama .n_tokens : n_past .value ] = - 1
3042- llama .n_tokens = n_past .value
3043-
30443024 @staticmethod
30453025 def _load_image (image_url : str ) -> bytes :
30463026 # TODO: Add Pillow support for other image formats beyond (jpg, png)
@@ -3533,6 +3513,58 @@ def __call__(self, **kwargs):
35333513 return super ().__call__ (** kwargs )
35343514
35353515
3516+ class Gemma3ChatHandler (Llava15ChatHandler ):
3517+ # Chat Format:
3518+ # '<bos><start_of_turn>user\n{system_prompt}\n\n{prompt}<end_of_turn>\n<start_of_turn>model\n'
3519+
3520+ DEFAULT_SYSTEM_MESSAGE = None
3521+
3522+ CHAT_FORMAT = (
3523+ "{{ '<bos>' }}"
3524+ "{% if messages[0]['role'] == 'system' %}"
3525+ "{% if messages[0]['content'] is string %}"
3526+ "{% set first_user_prefix = messages[0]['content'] + '\n \n ' %}"
3527+ "{% else %}"
3528+ "{% set first_user_prefix = messages[0]['content'][0]['text'] + '\n \n ' %}"
3529+ "{% endif %}"
3530+ "{% set loop_messages = messages[1:] %}"
3531+ "{% else %}"
3532+ "{% set first_user_prefix = \" \" %}"
3533+ "{% set loop_messages = messages %}"
3534+ "{% endif %}"
3535+ "{% for message in loop_messages %}"
3536+ "{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}"
3537+ "{{ raise_exception(\" Conversation roles must alternate user/assistant/user/assistant/...\" ) }}"
3538+ "{% endif %}"
3539+ "{% if (message['role'] == 'assistant') %}"
3540+ "{% set role = \" model\" %}"
3541+ "{% else %}"
3542+ "{% set role = message['role'] %}"
3543+ "{% endif %}"
3544+ "{{ '<start_of_turn>' + role + '\n ' + (first_user_prefix if loop.first else \" \" ) }}"
3545+ "{% if message['content'] is string %}"
3546+ "{{ message['content'] | trim }}"
3547+ "{% elif message['content'] is iterable %}"
3548+ "{% for item in message['content'] %}"
3549+ "{% if item['type'] == 'image_url' and item['image_url'] is string %}"
3550+ "{{ '\n \n ' + item['image_url'] + '\n \n ' }}"
3551+ "{% elif item['type'] == 'image_url' and item['image_url'] is mapping %}"
3552+ "{{ '\n \n ' + item['image_url']['url'] + '\n \n ' }}"
3553+ "{% elif item['type'] == 'text' %}"
3554+ "{{ item['text'] | trim }}"
3555+ "{% endif %}"
3556+ "{% endfor %}"
3557+ "{% else %}"
3558+ "{{ raise_exception(\" Invalid content type\" ) }}"
3559+ "{% endif %}"
3560+ "{{ '<end_of_turn>\n ' }}"
3561+ "{% endfor %}"
3562+ "{% if add_generation_prompt %}"
3563+ "{{ '<start_of_turn>model\n ' }}"
3564+ "{% endif %}"
3565+ )
3566+
3567+
35363568@register_chat_completion_handler ("chatml-function-calling" )
35373569def chatml_function_calling (
35383570 llama : llama .Llama ,
0 commit comments