We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent fba2010 commit cb206e6Copy full SHA for cb206e6
1 file changed
cpp/tensorrt_llm/thop/attentionOp.h
@@ -78,8 +78,7 @@ void attention(torch::Tensor q, std::optional<torch::Tensor> k, std::optional<to
78
std::optional<torch::Tensor> fmha_scheduler_counter, std::optional<torch::Tensor> mla_bmm1_scale,
79
std::optional<torch::Tensor> mla_bmm2_scale, std::optional<torch::Tensor> quant_q_buffer,
80
std::optional<torch::Tensor> flash_mla_tile_scheduler_metadata = std::nullopt,
81
- std::optional<torch::Tensor> flash_mla_num_splits = std::nullopt,
82
- int64_t num_contexts = 0,
+ std::optional<torch::Tensor> flash_mla_num_splits = std::nullopt, int64_t num_contexts = 0,
83
int64_t num_ctx_tokens = 0);
84
85
struct KvCachePoolPointers
0 commit comments