Tencent · anminliu · May 29, 2026 · May 27, 2026 · May 28, 2026
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "angelslim/compressor/sparsity/vecattention/ops/vllm-flash-attention"]
+	path = angelslim/compressor/sparsity/vecattention/ops/vllm-flash-attention
+	url = git@github.com:anminliu/vllm-flash-attention.git
diff --git a/README.md b/README.md
@@ -135,6 +135,12 @@ A more accessible, comprehensive, and efficient toolkit for large model compress
       </td>
       <td>
         <ul style="padding-left: 0; list-style-position: inside;">
+          <li>
+            <strong>Sparse Attention</strong>
+            <ul style="padding-left: 1.5rem">
+              <li><a href="https://github.com/anminliu/VecAttention">VecAttention</a></li>
+            </ul>
+          </li>
           <li>
             <strong>Token Pruning</strong>
             <ul style="padding-left: 1.5rem">

diff --git a/README_cn.md b/README_cn.md
@@ -136,6 +136,12 @@
       </td>
       <td>
         <ul style="padding-left: 0; list-style-position: inside;">
+          <li>
+            <strong>稀疏注意力</strong>
+            <ul style="padding-left: 1.5rem">
+              <li><a href="https://github.com/anminliu/VecAttention">VecAttention</a></li>
+            </ul>
+          </li>
           <li>
             <strong>Token剪枝</strong>
             <ul style="padding-left: 1.5rem">

diff --git a/angelslim/compressor/sparsity/__init__.py b/angelslim/compressor/sparsity/__init__.py
@@ -13,5 +13,6 @@
 # limitations under the License.
 
 from .stem import StemInference  # noqa: F401
+from .vecattention import VecAttentionInference  # noqa: F401
 
-__all__ = ["StemInference"]
+__all__ = ["StemInference", "VecAttentionInference"]
diff --git a/angelslim/compressor/sparsity/vecattention/__init__.py b/angelslim/compressor/sparsity/vecattention/__init__.py
@@ -0,0 +1,17 @@
+# Copyright 2025 Tencent Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .vecattention import VecAttentionInference  # noqa: F401
+
+__all__ = ["VecAttentionInference"]
diff --git a/angelslim/compressor/sparsity/vecattention/modules/__init__.py b/angelslim/compressor/sparsity/vecattention/modules/__init__.py
@@ -0,0 +1,19 @@
+# Copyright 2025 Tencent Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""VecAttention-patched attention forward methods for VLM."""
+
+from .forward import qwen_vl_attn_forward
+
+__all__ = ["qwen_vl_attn_forward"]