Skip to content

Commit f8ca118

Browse files
committed
add patch
1 parent 41706fd commit f8ca118

2 files changed

Lines changed: 36 additions & 0 deletions

File tree

benchmarks/single_node/multiturn_fp4_b200_homogeneous_aiperf.sh

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,24 @@ AIPERF_DIR="$MULTITURN_DIR/aiperf"
4343

4444
pip install --quiet urllib3 requests 2>/dev/null || true
4545

46+
# Patch vLLM bug: local_cache_hit counter can go negative under high load
47+
# (causes "Counters can only be incremented by non-negative amounts" crash)
48+
STATS_FILE=$(python3 -c "import vllm; import os; print(os.path.join(os.path.dirname(vllm.__file__), 'v1', 'metrics', 'stats.py'))" 2>/dev/null || echo "")
49+
if [ -n "$STATS_FILE" ] && [ -f "$STATS_FILE" ] && grep -q 'self.local_cache_hit += (' "$STATS_FILE"; then
50+
echo "Patching vLLM stats.py: $STATS_FILE"
51+
python3 -c "
52+
import re, sys
53+
with open(sys.argv[1]) as f:
54+
src = f.read()
55+
src = src.replace(
56+
'self.local_cache_hit += (\n num_cached_tokens + recomputed - num_external_computed_tokens\n )',
57+
'self.local_cache_hit += max(0,\n num_cached_tokens + recomputed - num_external_computed_tokens\n )',
58+
)
59+
with open(sys.argv[1], 'w') as f:
60+
f.write(src)
61+
" "$STATS_FILE"
62+
fi
63+
4664
# ---- Conversation count ----------------------------------------------------
4765
if [ -n "${DURATION:-}" ]; then
4866
CONV_COUNT=10000

benchmarks/single_node/multiturn_fp8_h200_homogeneous_aiperf.sh

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,24 @@ AIPERF_DIR="$MULTITURN_DIR/aiperf"
4343

4444
pip install --quiet urllib3 requests 2>/dev/null || true
4545

46+
# Patch vLLM bug: local_cache_hit counter can go negative under high load
47+
# (causes "Counters can only be incremented by non-negative amounts" crash)
48+
STATS_FILE=$(python3 -c "import vllm; import os; print(os.path.join(os.path.dirname(vllm.__file__), 'v1', 'metrics', 'stats.py'))" 2>/dev/null || echo "")
49+
if [ -n "$STATS_FILE" ] && [ -f "$STATS_FILE" ] && grep -q 'self.local_cache_hit += (' "$STATS_FILE"; then
50+
echo "Patching vLLM stats.py: $STATS_FILE"
51+
python3 -c "
52+
import re, sys
53+
with open(sys.argv[1]) as f:
54+
src = f.read()
55+
src = src.replace(
56+
'self.local_cache_hit += (\n num_cached_tokens + recomputed - num_external_computed_tokens\n )',
57+
'self.local_cache_hit += max(0,\n num_cached_tokens + recomputed - num_external_computed_tokens\n )',
58+
)
59+
with open(sys.argv[1], 'w') as f:
60+
f.write(src)
61+
" "$STATS_FILE"
62+
fi
63+
4664
# ---- Conversation count ----------------------------------------------------
4765
if [ -n "${DURATION:-}" ]; then
4866
CONV_COUNT=10000

0 commit comments

Comments
 (0)