From 0f96127dd6109f47c71391fe4da10ed7a9835726 Mon Sep 17 00:00:00 2001 From: Dipankar Sarkar Date: Mon, 20 Apr 2026 11:48:01 +0000 Subject: [PATCH] CB Bug fix for Qwen3VL Dense and basic cleaning of example script and model file Signed-off-by: Dipankar Sarkar --- .../models/qwen3_vl/modeling_qwen3_vl.py | 11 ++--------- .../models/qwen3vl/qwen3_vl.py | 18 +----------------- 2 files changed, 3 insertions(+), 26 deletions(-) diff --git a/QEfficient/transformers/models/qwen3_vl/modeling_qwen3_vl.py b/QEfficient/transformers/models/qwen3_vl/modeling_qwen3_vl.py index b47eab55e4..4ba8d1ff66 100644 --- a/QEfficient/transformers/models/qwen3_vl/modeling_qwen3_vl.py +++ b/QEfficient/transformers/models/qwen3_vl/modeling_qwen3_vl.py @@ -388,12 +388,6 @@ def forward( key_states = self.k_norm(self.k_proj(hidden_states).view(hidden_shape)).transpose(1, 2) value_states = self.v_proj(hidden_states).view(hidden_shape).transpose(1, 2) - # cos, sin = position_embeddings - # kv_seq_len = key_states.shape[-2] - # kv_seq_len = past_key_value.get_seq_length() - past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0 - # cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len) - query_states, key_states = qeff_apply_rotary_pos_emb( query_states, key_states, @@ -409,7 +403,6 @@ def forward( "cos": cos_cached, "batch_index": batch_index, "position_ids": position_ids[0], - "past_seen_tokens": past_seen_tokens, } if comp_ctx_lengths is not None: attention_mask = attention_mask[:, :, :, : comp_ctx_lengths.shape[-1]] @@ -503,7 +496,7 @@ def forward( residual = hidden_states hidden_states = self.post_attention_layernorm(hidden_states) hidden_states = self.mlp(hidden_states) - hidden_states = residual + hidden_states[0] + hidden_states = residual + hidden_states outputs = (hidden_states,) @@ -667,7 +660,7 @@ class QEffQwen3VLDecoderWrapper(nn.Module): def __init__(self, model): super().__init__() self.model = model - self.language_model = self.model.model + self.language_model = self.model.model.language_model def get_submodules_for_export(self) -> Type[nn.Module]: """ diff --git a/examples/image_text_to_text/models/qwen3vl/qwen3_vl.py b/examples/image_text_to_text/models/qwen3vl/qwen3_vl.py index 962daaa52a..eed98e6e0c 100644 --- a/examples/image_text_to_text/models/qwen3vl/qwen3_vl.py +++ b/examples/image_text_to_text/models/qwen3vl/qwen3_vl.py @@ -45,7 +45,7 @@ aic_enable_depth_first=True, skip_vision=True, mos=1, - use_onnx_subfunctions=True, + use_onnx_subfunctions=False, ) messages = [ @@ -84,8 +84,6 @@ num_devices=4, height=354, width=536, - # height=1024, - # width=1024, mxfp6_matmul=True, mxint8_kv_cache=True, aic_enable_depth_first=True, @@ -95,10 +93,6 @@ ### IMAGE + TEXT ### image_url = "https://picsum.photos/id/237/536/354" - # image_url = ( - # "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/datasets/cat_style_layout.png" - # ) - image = Image.open(requests.get(image_url, stream=True).raw) messages_1 = [ @@ -111,16 +105,6 @@ }, ] - # messages_2 = [ - # { - # "role": "user", - # "content": [ - # {"type": "image", "image": image}, - # {"type": "text", "text": "Describe about the color of the dog."}, - # ], - # }, - # ] - messages = [messages_1] * batch_size texts = [processor.apply_chat_template(msg, tokenize=False, add_generation_prompt=True) for msg in messages]