Skip to content

Commit a9b480f

Browse files
authored
feat: add Gemma 4 multimodal chat support (abetlen#2241)
1 parent 927b574 commit a9b480f

4 files changed

Lines changed: 60 additions & 0 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
- feat: add Gemma 4 multimodal chat support by @abetlen in #2241
1011
- feat(ci): add CUDA 13.0 and 13.2 wheel builds by @abetlen in #2239
1112
- feat(ci): add CUDA 11.8 wheel builds by @abetlen in #2238
1213
- fix(ci): add Pascal compute capability targets to CUDA wheel builds by @abetlen in #2237

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,7 @@ Below are the supported multi-modal models and their respective chat handlers (P
510510
| [llama-3-vision-alpha](https://huggingface.co/abetlen/llama-3-vision-alpha-gguf) | `Llama3VisionAlphaChatHandler` | `llama-3-vision-alpha` |
511511
| [minicpm-v-2.6](https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf) | `MiniCPMv26ChatHandler` | `minicpm-v-2.6` |
512512
| [qwen2.5-vl](https://huggingface.co/unsloth/Qwen2.5-VL-3B-Instruct-GGUF) | `Qwen25VLChatHandler` | `qwen2.5-vl` |
513+
| [gemma-4](https://huggingface.co/unsloth/gemma-4-E4B-it-GGUF) | `Gemma4ChatHandler` | `gemma4` |
513514

514515
Then you'll need to use a custom chat handler to load the clip model and process the chat messages and images.
515516

llama_cpp/llama_chat_format.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3265,6 +3265,50 @@ def from_pretrained(
32653265
)
32663266

32673267

3268+
class Gemma4ChatHandler(Llava15ChatHandler):
3269+
DEFAULT_SYSTEM_MESSAGE = None
3270+
3271+
CHAT_FORMAT = (
3272+
"{% if messages and messages[0]['role'] == 'system' %}"
3273+
"{% if messages[0]['content'] is string %}"
3274+
"{% set first_user_prefix = messages[0]['content'] + '\n\n' %}"
3275+
"{% else %}"
3276+
"{% set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' %}"
3277+
"{% endif %}"
3278+
"{% set loop_messages = messages[1:] %}"
3279+
"{% else %}"
3280+
"{% set first_user_prefix = '' %}"
3281+
"{% set loop_messages = messages %}"
3282+
"{% endif %}"
3283+
"{% for message in loop_messages %}"
3284+
"{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}"
3285+
"{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}"
3286+
"{% endif %}"
3287+
"{% set role = 'model' if message['role'] == 'assistant' else message['role'] %}"
3288+
"{{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else '') }}"
3289+
"{% if message['content'] is string %}"
3290+
"{{ message['content'] | trim }}"
3291+
"{% elif message['content'] is iterable %}"
3292+
"{% for item in message['content'] %}"
3293+
"{% if item['type'] == 'image_url' and item['image_url'] is string %}"
3294+
"{{ '\n\n' + item['image_url'] + '\n\n' }}"
3295+
"{% elif item['type'] == 'image_url' and item['image_url'] is mapping %}"
3296+
"{{ '\n\n' + item['image_url']['url'] + '\n\n' }}"
3297+
"{% elif item['type'] == 'text' %}"
3298+
"{{ item['text'] | trim }}"
3299+
"{% endif %}"
3300+
"{% endfor %}"
3301+
"{% else %}"
3302+
"{{ raise_exception('Invalid content type') }}"
3303+
"{% endif %}"
3304+
"{{ '<end_of_turn>\n' }}"
3305+
"{% endfor %}"
3306+
"{% if add_generation_prompt %}"
3307+
"{{ '<start_of_turn>model\n' }}"
3308+
"{% endif %}"
3309+
)
3310+
3311+
32683312
class ObsidianChatHandler(Llava15ChatHandler):
32693313
# Prompt Format
32703314
# The model followed ChatML format. However, with ### as the separator

llama_cpp/server/model.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,20 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
115115
chat_handler = llama_cpp.llama_chat_format.Llava16ChatHandler(
116116
clip_model_path=settings.clip_model_path, verbose=settings.verbose
117117
)
118+
elif settings.chat_format == "gemma4":
119+
assert settings.clip_model_path is not None, "clip model not found"
120+
if settings.hf_model_repo_id is not None:
121+
chat_handler = (
122+
llama_cpp.llama_chat_format.Gemma4ChatHandler.from_pretrained(
123+
repo_id=settings.hf_model_repo_id,
124+
filename=settings.clip_model_path,
125+
verbose=settings.verbose,
126+
)
127+
)
128+
else:
129+
chat_handler = llama_cpp.llama_chat_format.Gemma4ChatHandler(
130+
clip_model_path=settings.clip_model_path, verbose=settings.verbose
131+
)
118132
elif settings.chat_format == "moondream":
119133
assert settings.clip_model_path is not None, "clip model not found"
120134
if settings.hf_model_repo_id is not None:

0 commit comments

Comments
 (0)