Could anyone help me with a potential bug here in some function? I properly set up all utilities for it.
/VGen/tools/modules/clip_embedder.py line 289: RuntimeError: Mask shape should match input
Traceback (most recent call last):
File "/home/wenhao/Project/greatxue/mj_models/VGen/utils/registry.py", line 67, in build_from_config
return req_type_entry(**cfg)
File "/home/wenhao/Project/greatxue/mj_models/VGen/tools/inferences/inference_instructvideo_entrance.py", line 83, in inference_instructvideo_entrance
mp.spawn(worker, nprocs=cfg.gpus_per_machine, args=(cfg, ))
File "/home/wenhao/.conda/envs/vgen2/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 240, in spawn
return start_processes(fn, args, nprocs, join, daemon, start_method='spawn')
File "/home/wenhao/.conda/envs/vgen2/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 198, in start_processes
while not context.join():
File "/home/wenhao/.conda/envs/vgen2/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 160, in join
raise ProcessRaisedException(msg, error_index, failed_process.pid)
torch.multiprocessing.spawn.ProcessRaisedException:
-- Process 3 terminated with the following error:
Traceback (most recent call last):
File "/home/wenhao/.conda/envs/vgen2/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 69, in _wrap
fn(i, *args)
File "/home/wenhao/Project/greatxue/mj_models/VGen/tools/inferences/inference_instructvideo_entrance.py", line 205, in worker
y = clip_encoder(captions).detach() # bs * 77 *1024 [80, 77, 1024]
File "/home/wenhao/.conda/envs/vgen2/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/home/wenhao/Project/greatxue/mj_models/VGen/tools/modules/clip_embedder.py", line 272, in forward
z = self.encode_with_transformer(tokens.to(self.device))
File "/home/wenhao/Project/greatxue/mj_models/VGen/tools/modules/clip_embedder.py", line 284, in encode_with_transformer
x = self.text_transformer_forward(x, attn_mask=self.model.attn_mask)
File "/home/wenhao/Project/greatxue/mj_models/VGen/tools/modules/clip_embedder.py", line 296, in text_transformer_forward
x = r(x, attn_mask=attn_mask)
File "/home/wenhao/.conda/envs/vgen2/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/home/wenhao/.conda/envs/vgen2/lib/python3.8/site-packages/open_clip/transformer.py", line 263, in forward
x = q_x + self.ls_1(self.attention(q_x=self.ln_1(q_x), k_x=k_x, v_x=v_x, attn_mask=attn_mask))
File "/home/wenhao/.conda/envs/vgen2/lib/python3.8/site-packages/open_clip/transformer.py", line 250, in attention
return self.attn(
File "/home/wenhao/.conda/envs/vgen2/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/home/wenhao/.conda/envs/vgen2/lib/python3.8/site-packages/torch/nn/modules/activation.py", line 1113, in forward
return torch._native_multi_head_attention(
RuntimeError: Mask shape should match input
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "inference.py", line 18, in <module>
INFER_ENGINE.build(dict(type=cfg_update.TASK_TYPE), cfg_update=cfg_update.cfg_dict)
File "/home/wenhao/Project/greatxue/mj_models/VGen/utils/registry.py", line 107, in build
return self.build_func(*args, **kwargs, registry=self)
File "/home/wenhao/Project/greatxue/mj_models/VGen/utils/registry_class.py", line 7, in build_func
return build_from_config(cfg, registry, **kwargs)
File "/home/wenhao/Project/greatxue/mj_models/VGen/utils/registry.py", line 69, in build_from_config
raise Exception(f"Failed to invoke function {req_type_entry}, with {e}")
Exception: Failed to invoke function <function inference_instructvideo_entrance at 0x7f9d7c7a6700>, with
-- Process 3 terminated with the following error:
Traceback (most recent call last):
File "/home/wenhao/.conda/envs/vgen2/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 69, in _wrap
fn(i, *args)
File "/home/wenhao/Project/greatxue/mj_models/VGen/tools/inferences/inference_instructvideo_entrance.py", line 205, in worker
y = clip_encoder(captions).detach() # bs * 77 *1024 [80, 77, 1024]
File "/home/wenhao/.conda/envs/vgen2/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/home/wenhao/Project/greatxue/mj_models/VGen/tools/modules/clip_embedder.py", line 272, in forward
z = self.encode_with_transformer(tokens.to(self.device))
File "/home/wenhao/Project/greatxue/mj_models/VGen/tools/modules/clip_embedder.py", line 284, in encode_with_transformer
x = self.text_transformer_forward(x, attn_mask=self.model.attn_mask)
File "/home/wenhao/Project/greatxue/mj_models/VGen/tools/modules/clip_embedder.py", line 296, in text_transformer_forward
x = r(x, attn_mask=attn_mask)
File "/home/wenhao/.conda/envs/vgen2/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/home/wenhao/.conda/envs/vgen2/lib/python3.8/site-packages/open_clip/transformer.py", line 263, in forward
x = q_x + self.ls_1(self.attention(q_x=self.ln_1(q_x), k_x=k_x, v_x=v_x, attn_mask=attn_mask))
File "/home/wenhao/.conda/envs/vgen2/lib/python3.8/site-packages/open_clip/transformer.py", line 250, in attention
return self.attn(
File "/home/wenhao/.conda/envs/vgen2/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/home/wenhao/.conda/envs/vgen2/lib/python3.8/site-packages/torch/nn/modules/activation.py", line 1113, in forward
return torch._native_multi_head_attention(
RuntimeError: Mask shape should match input
Could anyone help me with a potential bug here in some function? I properly set up all utilities for it.
/VGen/tools/modules/clip_embedder.py line 289: RuntimeError: Mask shape should match input