Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
5fc4f53
rebase(transformers): align modeling wrappers, cache_utils and other …
vbaddi May 25, 2026
69bf9b9
fix: improve weight offloading to handle plain tensor attrs and use t…
quic-rishinr May 26, 2026
6fc9779
fix retry timeout (#1012)
quic-mamta May 27, 2026
53c2864
Change CCL input precision from int8 to int64 to align with compiler …
vjanfaza Jun 1, 2026
f823cde
Send device_ids as int to qaicrt.Program when len(device_ids) == 1 (#…
quic-sanising Jun 2, 2026
f0fdd41
Add GLM4-MOE Mode w/Disaggregated Prefill and Decode Support (#988)
vbaddi Jun 2, 2026
e209f9e
support multiple TLM decode specializations via num_speculative_token…
eplatero97 Jun 2, 2026
7687e2f
Fix for fp16/bf16 export & compile in qwen3vl & qwen3vlmoe models (#980)
qcdipankar Jun 2, 2026
385f455
Enabling support of rerankers models 2B and 8B of qwen3vl (#921)
quic-amitraj Jun 2, 2026
e3f7d01
feat(moe): NSP-blocked expert dispatch for Qwen3MOE and GPT-OSS prefi…
vbaddi Jun 3, 2026
bf25dad
qwen3_5_linear_attn (#901)
mohiso22 Jun 3, 2026
4e0f98c
Gemma4 (#966)
tchawada Jun 3, 2026
f776e80
Enabled Qwen3-VL embedding model (#923)
quic-amitraj Jun 3, 2026
6ccce3b
Depricate the mllama 3.2 model (llama3.2 vision) (#1018)
quic-hemagnih Jun 3, 2026
6d144c6
fix(0306): ONNX reuse for disaggregated compile and diffusion module …
vbaddi Jun 3, 2026
1e9aa01
test(0306): Add enable_proxy coverage across supported model categori…
vbaddi Jun 4, 2026
065543d
[Nightly]: New Models are added in the Nightly List (#1034)
abukhoy Jun 4, 2026
6b8db51
Added layerwise changes for qwen_3_vl_moe, qwen_3_moe and qwen_3_5 (#…
abhishek-singh591 Jun 4, 2026
dc3d674
Bug Fix for Qwen3 VL Moe acc issue (#1024)
tv-karthikeya Jun 5, 2026
6c72846
Reverting past_seen_token calculation to based on cache_position (#1032)
asmigosw Jun 5, 2026
c967121
Added the fix for layerwise (#1040)
abhishek-singh591 Jun 5, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ jobs:
python-version: "3.11"

- name: Install package and test dependencies
run: pip install -e ".[test]"
run: |
pip install -e ".[test]"
pip install onnx_ir

- name: Run unit tests
env:
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ repos:
# Run the linter.
- id: ruff
types_or: [ python, pyi, jupyter ]
args: [ --fix ]
args: [ --fix, --ignore, F ]
# Run the formatter.
- id: ruff-format
types_or: [ python, pyi, jupyter ]
45 changes: 39 additions & 6 deletions QEfficient/__init__.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,25 @@
# ----------------------------------------------------------------------------- #
# Placeholder for all non-transformer models registered in QEfficient
import warnings # noqa: I001
import transformers
import transformers.utils as transformers_utils

try:
from transformers import HybridCache as _TransformersHybridCache # noqa: F401
except ImportError:
from transformers.cache_utils import DynamicCache

class HybridCache(DynamicCache):
pass

class HybridChunkedCache(HybridCache):
pass

transformers.HybridCache = HybridCache
transformers.HybridChunkedCache = HybridChunkedCache

if not hasattr(transformers_utils, "FLAX_WEIGHTS_NAME"):
transformers_utils.FLAX_WEIGHTS_NAME = "flax_model.msgpack"
import QEfficient.utils.model_registery # noqa: F401
from QEfficient.base import (
QEFFAutoModel,
Expand All @@ -29,16 +47,27 @@
QEFFCommonLoader,
)
from QEfficient.compile.compile_helper import compile
from QEfficient.diffusers.pipelines.flux.pipeline_flux import QEffFluxPipeline
from QEfficient.diffusers.pipelines.wan.pipeline_wan import QEffWanPipeline
from QEfficient.diffusers.pipelines.wan.pipeline_wan_i2v import QEffWanImageToVideoPipeline
from QEfficient.exporter.export_hf_to_cloud_ai_100 import qualcomm_efficient_converter
from QEfficient.generation.text_generation_inference import cloud_ai_100_exec_kv
from QEfficient.peft import QEffAutoPeftModelForCausalLM
from QEfficient.transformers.transform import transform
from QEfficient.utils import custom_format_warning
from QEfficient.utils.logging_utils import logger

try:
from QEfficient.diffusers.pipelines.flux.pipeline_flux import QEffFluxPipeline
from QEfficient.diffusers.pipelines.wan.pipeline_wan import QEffWanPipeline
from QEfficient.diffusers.pipelines.wan.pipeline_wan_i2v import QEffWanImageToVideoPipeline
except Exception:
QEffFluxPipeline = None
QEffWanPipeline = None
QEffWanImageToVideoPipeline = None

try:
from QEfficient.peft import QEffAutoPeftModelForCausalLM
except Exception:
QEffAutoPeftModelForCausalLM = None

# custom warning for the better logging experience
warnings.formatwarning = custom_format_warning

Expand All @@ -58,11 +87,15 @@
"QEFFAutoModelForSequenceClassification",
"QEFFAutoModelForSpeechSeq2Seq",
"QEFFCommonLoader",
"QEffFluxPipeline",
"QEffWanPipeline",
"QEffWanImageToVideoPipeline",
]

if QEffFluxPipeline is not None:
__all__.append("QEffFluxPipeline")
if QEffWanPipeline is not None:
__all__.append("QEffWanPipeline")
if QEffWanImageToVideoPipeline is not None:
__all__.append("QEffWanImageToVideoPipeline")


# Conditionally import QAIC-related modules if the SDK is installed
__version__ = "1.22.0.dev0"
Expand Down
Loading