Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 24 additions & 5 deletions scripts/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,11 @@ pipeline {
mkdir -p $PWD/Non_qaic_llm &&
export TOKENIZERS_PARALLELISM=false &&
export QEFF_HOME=$PWD/Non_qaic_llm &&
pytest tests -m '(llm_model) and (not qnn) and ${TEST_FILTER}' --ignore tests/vllm --ignore tests/unit_test --ignore tests/nightly_pipeline --junitxml=tests/tests_log2.xml --durations=10 &&
export QEFF_QAIC_DEVICE_POOL=0,1,2,3 &&
PYTEST_XDIST_ARGS='-n 4 --dist loadscope' &&
export QEFF_ENABLE_QAIC_DEVICE_ALLOCATOR=1 &&
if [ x${TEST_PROFILE} = xfull_layers_model ]; then PYTEST_XDIST_ARGS=''; unset QEFF_ENABLE_QAIC_DEVICE_ALLOCATOR; fi &&
pytest tests -m '(llm_model) and (not qnn) and ${TEST_FILTER}' --ignore tests/vllm --ignore tests/unit_test --ignore tests/nightly_pipeline \${PYTEST_XDIST_ARGS} --junitxml=tests/tests_log2.xml --durations=10 &&
junitparser merge tests/tests_log2.xml tests/tests_log.xml &&
deactivate"
'''
Expand All @@ -123,7 +127,11 @@ pipeline {
mkdir -p $PWD/Non_qaic_feature &&
export TOKENIZERS_PARALLELISM=false &&
export QEFF_HOME=$PWD/Non_qaic_feature &&
pytest tests -m '(on_qaic) and (feature) and (not qnn) and ${TEST_FILTER}' --ignore tests/transformers/sampler --ignore tests/vllm --ignore tests/unit_test --ignore tests/nightly_pipeline --junitxml=tests/tests_log2_feature.xml --durations=10 &&
export QEFF_QAIC_DEVICE_POOL=0,1,2,3 &&
PYTEST_XDIST_ARGS='-n 4 --dist loadscope' &&
export QEFF_ENABLE_QAIC_DEVICE_ALLOCATOR=1 &&
if [ x${TEST_PROFILE} = xfull_layers_model ]; then PYTEST_XDIST_ARGS=''; unset QEFF_ENABLE_QAIC_DEVICE_ALLOCATOR; fi &&
pytest tests -m '(on_qaic) and (feature) and (not qnn) and ${TEST_FILTER}' --ignore tests/transformers/sampler --ignore tests/vllm --ignore tests/unit_test --ignore tests/nightly_pipeline \${PYTEST_XDIST_ARGS} --junitxml=tests/tests_log2_feature.xml --durations=10 &&
junitparser merge tests/tests_log2_feature.xml tests/tests_log.xml &&
deactivate"
'''
Expand All @@ -141,7 +149,11 @@ pipeline {
mkdir -p $PWD/Non_cli_qaic_multimodal &&
export TOKENIZERS_PARALLELISM=false &&
export QEFF_HOME=$PWD/Non_cli_qaic_multimodal &&
pytest tests -m '(multimodal) and (not qnn) and ${TEST_FILTER}' --ignore tests/vllm --ignore tests/unit_test --ignore tests/nightly_pipeline --ignore tests/transformers/models/reranker/test_reranker_mad.py --junitxml=tests/tests_log6.xml --durations=10 &&
export QEFF_QAIC_DEVICE_POOL=0,1,2,3 &&
PYTEST_XDIST_ARGS='-n 4 --dist loadscope' &&
export QEFF_ENABLE_QAIC_DEVICE_ALLOCATOR=1 &&
if [ x${TEST_PROFILE} = xfull_layers_model ]; then PYTEST_XDIST_ARGS=''; unset QEFF_ENABLE_QAIC_DEVICE_ALLOCATOR; fi &&
pytest tests -m '(multimodal) and (not qnn) and ${TEST_FILTER}' --ignore tests/vllm --ignore tests/unit_test --ignore tests/nightly_pipeline --ignore tests/transformers/models/reranker/test_reranker_mad.py \${PYTEST_XDIST_ARGS} --junitxml=tests/tests_log6.xml --durations=10 &&
junitparser merge tests/tests_log6.xml tests/tests_log.xml &&
deactivate"
'''
Expand Down Expand Up @@ -179,7 +191,11 @@ pipeline {
export TOKENIZERS_PARALLELISM=false &&
export QEFF_HOME=$PWD/Non_cli_qaic_diffusion &&
export HF_HUB_CACHE=/huggingface_hub &&
pytest tests -m 'diffusion_models' --ignore tests/vllm --ignore tests/unit_test --ignore tests/nightly_pipeline --junitxml=tests/tests_log_diffusion.xml --durations=10 &&
export QEFF_QAIC_DEVICE_POOL=0,1,2,3 &&
PYTEST_XDIST_ARGS='-n 4 --dist loadscope' &&
export QEFF_ENABLE_QAIC_DEVICE_ALLOCATOR=1 &&
if [ x${TEST_PROFILE} = xfull_layers_model ]; then PYTEST_XDIST_ARGS=''; unset QEFF_ENABLE_QAIC_DEVICE_ALLOCATOR; fi &&
pytest tests -m 'diffusion_models' --ignore tests/vllm --ignore tests/unit_test --ignore tests/nightly_pipeline \${PYTEST_XDIST_ARGS} --junitxml=tests/tests_log_diffusion.xml --durations=10 &&
junitparser merge tests/tests_log_diffusion.xml tests/tests_log.xml &&
deactivate"
'''
Expand All @@ -200,7 +216,10 @@ pipeline {
mkdir -p $PWD/cli &&
export TOKENIZERS_PARALLELISM=false &&
export QEFF_HOME=$PWD/cli &&
pytest tests -m '(cli and not qnn) and (not finetune)' --ignore tests/vllm --ignore tests/unit_test --ignore tests/nightly_pipeline --junitxml=tests/tests_log3.xml --durations=10 &&
export QEFF_QAIC_DEVICE_POOL=0,1,2,3 &&
PYTEST_XDIST_ARGS='-n 4 --dist loadscope' &&
export QEFF_ENABLE_QAIC_DEVICE_ALLOCATOR=1 &&
pytest tests -m '(cli and not qnn) and (not finetune)' --ignore tests/vllm --ignore tests/unit_test --ignore tests/nightly_pipeline \${PYTEST_XDIST_ARGS} --junitxml=tests/tests_log3.xml --durations=10 &&
junitparser merge tests/tests_log3.xml tests/tests_log.xml &&
deactivate"
'''
Expand Down
25 changes: 16 additions & 9 deletions tests/cloud/test_export_compile_execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import pytest
import yaml
from transformers import AutoConfig

import QEfficient
from QEfficient.cloud.execute import main as execute
Expand Down Expand Up @@ -38,14 +39,16 @@ def check_export_compile_execute(mocker, model_name, full_batch_size=None, enabl
base_key = "past_key."
base_value = "past_value."
precision = "float16"
config = AutoConfig.from_pretrained(model_name)
num_layers = getattr(config, "num_hidden_layers", getattr(config, "n_layer", 12))

data = []

for i in range(12):
for i in range(num_layers):
data.append({"IOName": f"{base_key}{i}", "Precision": precision})
data.append({"IOName": f"{base_value}{i}", "Precision": precision})

for i in range(12):
for i in range(num_layers):
data.append({"IOName": f"{base_key}{i}_RetainedState", "Precision": precision})
data.append({"IOName": f"{base_value}{i}_RetainedState", "Precision": precision})

Expand All @@ -61,8 +64,8 @@ def check_export_compile_execute(mocker, model_name, full_batch_size=None, enabl
aic_enable_depth_first=True,
mos=1,
batch_size=1,
prompt_len=32,
ctx_len=128,
prompt_len=8,
ctx_len=32,
mxfp6=True,
mxint8=True,
full_batch_size=full_batch_size,
Expand All @@ -77,7 +80,7 @@ def check_export_compile_execute(mocker, model_name, full_batch_size=None, enabl
qpc_path=qpc_path,
prompt="My name is",
prompts_txt_file_path="examples/sample_prompts/prompts.txt",
generation_len=20,
generation_len=4,
full_batch_size=full_batch_size,
)

Expand All @@ -89,27 +92,31 @@ def check_export_compile_execute(mocker, model_name, full_batch_size=None, enabl
@pytest.mark.cli
def test_export_compile_execute(mocker):
# testing export -> compile -> infer without full_batch_size
check_export_compile_execute(mocker, model_name="gpt2")
check_export_compile_execute(mocker, model_name="hf-internal-testing/tiny-random-GPT2LMHeadModel")


@pytest.mark.on_qaic
@pytest.mark.cli
def test_export_compile_execute_fbs(mocker):
# testing export -> compile -> infer with full_batch_size
check_export_compile_execute(mocker, model_name="gpt2", full_batch_size=3)
check_export_compile_execute(
mocker, model_name="hf-internal-testing/tiny-random-GPT2LMHeadModel", full_batch_size=3
)


@pytest.mark.on_qaic
@pytest.mark.qnn
@pytest.mark.cli
def test_export_compile_execute_qnn(mocker):
# testing export -> compile -> infer without full_batch_size in QNN environment
check_export_compile_execute(mocker, model_name="gpt2", enable_qnn=True)
check_export_compile_execute(mocker, model_name="hf-internal-testing/tiny-random-GPT2LMHeadModel", enable_qnn=True)


@pytest.mark.on_qaic
@pytest.mark.qnn
@pytest.mark.cli
def test_export_compile_execute_qnn_fbs(mocker):
# testing export -> compile -> infer with full_batch_size in QNN environment
check_export_compile_execute(mocker, model_name="gpt2", full_batch_size=3, enable_qnn=True)
check_export_compile_execute(
mocker, model_name="hf-internal-testing/tiny-random-GPT2LMHeadModel", full_batch_size=3, enable_qnn=True
)
25 changes: 18 additions & 7 deletions tests/cloud/test_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ def check_infer(
mos=1,
hf_token=None,
batch_size=1,
prompt_len=32,
ctx_len=128,
prompt_len=8,
ctx_len=32,
generation_len=generation_len,
mxfp6=True,
mxint8=True,
Expand Down Expand Up @@ -70,30 +70,40 @@ def test_infer(mocker):
Ref: https://pytest-mock.readthedocs.io/en/latest/usage.html
"""
# testing infer without full_batch_size
check_infer(mocker, model_name="lu-vae/llama-68m-fft")
check_infer(mocker, model_name="hf-internal-testing/tiny-random-LlamaForCausalLM", generation_len=4)


@pytest.mark.on_qaic
@pytest.mark.cli
def test_infer_fbs(mocker):
# testing infer with full_batch_size
check_infer(mocker, model_name="lu-vae/llama-68m-fft", full_batch_size=3)
check_infer(
mocker, model_name="hf-internal-testing/tiny-random-LlamaForCausalLM", full_batch_size=3, generation_len=4
)


@pytest.mark.on_qaic
@pytest.mark.cli
@pytest.mark.qnn
def test_infer_qnn(mocker):
# testing infer without full_batch_size in QNN environment
check_infer(mocker, model_name="lu-vae/llama-68m-fft", enable_qnn=True)
check_infer(
mocker, model_name="hf-internal-testing/tiny-random-LlamaForCausalLM", enable_qnn=True, generation_len=4
)


@pytest.mark.on_qaic
@pytest.mark.cli
@pytest.mark.qnn
def test_infer_qnn_fbs(mocker):
# testing infer with full_batch_size in QNN environment
check_infer(mocker, model_name="lu-vae/llama-68m-fft", full_batch_size=3, enable_qnn=True)
check_infer(
mocker,
model_name="hf-internal-testing/tiny-random-LlamaForCausalLM",
full_batch_size=3,
enable_qnn=True,
generation_len=4,
)


@pytest.mark.on_qaic
Expand All @@ -102,9 +112,10 @@ def test_infer_vlm(mocker):
# testing infer for MM models
check_infer(
mocker,
model_name="llava-hf/llava-1.5-7b-hf",
model_name="tiny-random/gemma-3",
prompt="Describe the image.",
image_url="https://i.etsystatic.com/8155076/r/il/0825c2/1594869823/il_fullxfull.1594869823_5x0w.jpg",
generation_len=4,
)


Expand Down
14 changes: 7 additions & 7 deletions tests/configs/audio_model_configs.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"speech_seq2seq_models": [
"openai/whisper-tiny"
],
"audio_embedding_models": [
"facebook/wav2vec2-base-960h"
]
}
"speech_seq2seq_models": [
"hf-internal-testing/tiny-random-WhisperForConditionalGeneration"
],
"audio_embedding_models": [
"hf-internal-testing/tiny-random-wav2vec2"
]
}
13 changes: 9 additions & 4 deletions tests/configs/causal_model_configs.json
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,7 @@
],
"disaggregated_dummy_models": [
{
"model_name": "openai/gpt-oss-20b",
"model_name": "tiny-random/gpt-oss-bf16",
"model_type": "gpt_oss",
"tokenizer_id": "gpt2",
"additional_params": {
Expand All @@ -671,7 +671,7 @@
"num_local_experts": 4,
"head_dim": 32,
"max_position_embeddings": 512,
"vocab_size": 201088,
"vocab_size": 50257,
"sliding_window": 128
}
},
Expand Down Expand Up @@ -708,15 +708,20 @@
}
},
{
"model_name": "openai/gpt-oss-20b",
"model_name": "tiny-random/gpt-oss-bf16",
"model_type": "gpt_oss",
"additional_params": {
"num_hidden_layers": 2,
"hidden_size": 64,
"intermediate_size": 256,
"num_attention_heads": 2,
"num_key_value_heads": 1,
"num_local_experts": 4
"num_local_experts": 4,
"vocab_size": 8192,
"max_position_embeddings": 128,
"sliding_window": 128,
"pad_token_id": 0,
"eos_token_id": 0
}
}
]
Expand Down
69 changes: 4 additions & 65 deletions tests/configs/image_text_model_configs.json
Original file line number Diff line number Diff line change
Expand Up @@ -499,77 +499,16 @@
],
"image_text_subfunction_models":[
{
"model_name": "Qwen/Qwen2.5-VL-3B-Instruct",
"model_name": "optimum-intel-internal-testing/tiny-random-qwen2.5-vl",
"model_type": "qwen2_5_vl",
"batch_size": 1,
"prompt_len": 128,
"ctx_len": 4096,
"img_size": 1540,
"ctx_len": 512,
"img_size": 224,
"img_url": "https://picsum.photos/id/237/536/354",
"text_prompt": "Can you describe the image in detail.",
"num_layers": 1,
"additional_params": {
"dtype": "float32",
"hidden_size": 2048,
"intermediate_size": 11008,
"max_position_embeddings": 128000,
"max_window_layers": 70,
"num_attention_heads": 16,
"num_hidden_layers": 1,
"num_key_value_heads": 2,
"text_config": {
"architectures": [
"Qwen2_5_VLForConditionalGeneration"
],
"layer_types": [
"full_attention"
],
"dtype": "float32",
"hidden_size": 2048,
"intermediate_size": 11008,
"max_position_embeddings": 128000,
"max_window_layers": 70,
"model_type": "qwen2_5_vl_text",
"num_attention_heads": 16,
"num_hidden_layers": 1,
"num_key_value_heads": 2,
"rms_norm_eps": 1e-06,
"rope_scaling": {
"mrope_section": [
16,
24,
24
],
"rope_type": "default",
"type": "default"
},
"vocab_size": 151936
},
"vision_config": {
"depth": 1,
"num_hidden_layers": 1,
"hidden_act": "silu",
"hidden_size": 1280,
"intermediate_size": 3420,
"num_heads": 16,
"in_chans": 3,
"out_hidden_size": 2048,
"patch_size": 14,
"spatial_merge_size": 2,
"spatial_patch_size": 14,
"window_size": 112,
"fullatt_block_indexes": [
7,
15,
23,
31
],
"tokens_per_second": 2,
"temporal_patch_size": 2
},
"vision_start_token_id": 151652,
"vocab_size": 151936
}
"additional_params": {}
}
],
"image_text_custom_dtype_models":[
Expand Down
8 changes: 4 additions & 4 deletions tests/configs/sequence_model_configs.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"seq_classification_models": [
"meta-llama/Llama-Prompt-Guard-2-22M"
]
}
"seq_classification_models": [
"ydshieh/tiny-random-BertForSequenceClassification"
]
}
Loading
Loading