Tensor mismatch during run_hf_model_benchmark.py execution

`PYTHONPATH=. python scripts/run_hf_model_benchmark.py --benchmark ttft --folder_suffix version_0` gives an error 
```
Traceback (most recent call last):
  File "/home/nikita_blago/mlstm_kernels/scripts/run_hf_model_benchmark.py", line 540, in <module>
    run_multiple_benchmarks(
  File "/home/nikita_blago/mlstm_kernels/scripts/run_hf_model_benchmark.py", line 492, in run_multiple_benchmarks
    _time_to_first_token_benchmark(
  File "/home/nikita_blago/mlstm_kernels/scripts/run_hf_model_benchmark.py", line 453, in _time_to_first_token_benchmark
    run_and_record_benchmarks(
  File "/home/nikita_blago/mlstm_kernels/mlstm_kernels/utils/benchmark/run_benchmark.py", line 180, in run_and_record_benchmarks
    result_df = run_benchmarks_fn(
                ^^^^^^^^^^^^^^^^^^
  File "/home/nikita_blago/mlstm_kernels/mlstm_kernels/utils/benchmark/run_benchmark.py", line 102, in run_model_benchmarks
    benchmark.setup_model()
  File "/home/nikita_blago/mlstm_kernels/mlstm_kernels/utils/benchmark/benchmarks/huggingface_model_benchmark.py", line 163, in setup_model
    self.model = AutoModelForCausalLM.from_config(self.hf_model_config)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/nfs-gpu/xlstm/miniforge3/envs/tunableop/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 456, in from_config
    return model_class._from_config(config, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/nfs-gpu/xlstm/miniforge3/envs/tunableop/lib/python3.11/site-packages/transformers/modeling_utils.py", line 316, in _wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/nfs-gpu/xlstm/miniforge3/envs/tunableop/lib/python3.11/site-packages/transformers/modeling_utils.py", line 2381, in _from_config
    model = cls(config, **kwargs)
            ^^^^^^^^^^^^^^^^^^^^^
  File "/nfs-gpu/xlstm/miniforge3/envs/tunableop/lib/python3.11/site-packages/transformers/models/xlstm/modeling_xlstm.py", line 1518, in __init__
    self.backbone = xLSTMModel(config)
                    ^^^^^^^^^^^^^^^^^^
  File "/nfs-gpu/xlstm/miniforge3/envs/tunableop/lib/python3.11/site-packages/transformers/models/xlstm/modeling_xlstm.py", line 1386, in __init__
    self.post_init()
  File "/nfs-gpu/xlstm/miniforge3/envs/tunableop/lib/python3.11/site-packages/transformers/modeling_utils.py", line 2262, in post_init
    self.init_weights()
  File "/nfs-gpu/xlstm/miniforge3/envs/tunableop/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3646, in init_weights
    self.initialize_weights()
  File "/nfs-gpu/xlstm/miniforge3/envs/tunableop/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/nfs-gpu/xlstm/miniforge3/envs/tunableop/lib/python3.11/site-packages/transformers/modeling_utils.py", line 2989, in initialize_weights
    self.smart_apply(self._initialize_weights)
  File "/nfs-gpu/xlstm/miniforge3/envs/tunableop/lib/python3.11/site-packages/transformers/modeling_utils.py", line 2982, in smart_apply
    module.smart_apply(fn)
  File "/nfs-gpu/xlstm/miniforge3/envs/tunableop/lib/python3.11/site-packages/transformers/modeling_utils.py", line 2982, in smart_apply
    module.smart_apply(fn)
  File "/nfs-gpu/xlstm/miniforge3/envs/tunableop/lib/python3.11/site-packages/transformers/modeling_utils.py", line 2982, in smart_apply
    module.smart_apply(fn)
  [Previous line repeated 1 more time]
  File "/nfs-gpu/xlstm/miniforge3/envs/tunableop/lib/python3.11/site-packages/transformers/modeling_utils.py", line 2983, in smart_apply
    fn(self)
  File "/nfs-gpu/xlstm/miniforge3/envs/tunableop/lib/python3.11/site-packages/transformers/modeling_utils.py", line 2957, in _initialize_weights
    self._init_weights(module)
  File "/nfs-gpu/xlstm/miniforge3/envs/tunableop/lib/python3.11/site-packages/transformers/models/xlstm/modeling_xlstm.py", line 1269, in _init_weights
    module.bias[: self.config.num_heads] += -module.bias[
                                            ^^^^^^^^^^^^^
RuntimeError: The size of tensor a (8) must match the size of tensor b (16) at non-singleton dimension 0
```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Tensor mismatch during run_hf_model_benchmark.py execution #12

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Tensor mismatch during run_hf_model_benchmark.py execution #12

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions